Morphological variation of amazon and sailfin mollies
In this study, I am looking at various populations of amazon and sailfin mollies across their native/introduced range to assess morphological variation both within and among the species. I am using the Pickle fish collections for my samples.
I will use all data first to see the trends, then filter for confirmed adult sizes (see “Filtered”).
Data collection
## Using libcurl 8.3.0 with Schannel
I will use Shapiro-wilke, histograms, and QQ plots to determine what traits are normal. These will only be performed on continuous variables, as discrete variables are not normal by nature.
Conclusions: literally all of them are NOT normal… will log transform them and double check normality. Had to square-root three traits () because still not normal with log transform.
shapiro.test(raw1$SL)
##
## Shapiro-Wilk normality test
##
## data: raw1$SL
## W = 0.9763, p-value = 2.763e-05
shapiro.test(raw1$BD)
##
## Shapiro-Wilk normality test
##
## data: raw1$BD
## W = 0.96287, p-value = 1.787e-07
shapiro.test(raw1$CPD)
##
## Shapiro-Wilk normality test
##
## data: raw1$CPD
## W = 0.96431, p-value = 2.908e-07
shapiro.test(raw1$CPL)
##
## Shapiro-Wilk normality test
##
## data: raw1$CPL
## W = 0.97288, p-value = 6.831e-06
shapiro.test(raw1$PreDL)
##
## Shapiro-Wilk normality test
##
## data: raw1$PreDL
## W = 0.97924, p-value = 9.997e-05
shapiro.test(raw1$DbL)
##
## Shapiro-Wilk normality test
##
## data: raw1$DbL
## W = 0.97697, p-value = 3.68e-05
shapiro.test(raw1$HL)
##
## Shapiro-Wilk normality test
##
## data: raw1$HL
## W = 0.94955, p-value = 3.057e-09
shapiro.test(raw1$HD)
##
## Shapiro-Wilk normality test
##
## data: raw1$HD
## W = 0.96761, p-value = 9.28e-07
shapiro.test(raw1$HW)
##
## Shapiro-Wilk normality test
##
## data: raw1$HW
## W = 0.97201, p-value = 4.833e-06
shapiro.test(raw1$SnL)
##
## Shapiro-Wilk normality test
##
## data: raw1$SnL
## W = 0.65042, p-value < 2.2e-16
shapiro.test(raw1$OL)
##
## Shapiro-Wilk normality test
##
## data: raw1$OL
## W = 0.98631, p-value = 0.003102
hist(raw1$SL)
hist(raw1$BD)
hist(raw1$CPD)
hist(raw1$CPL)
hist(raw1$PreDL)
hist(raw1$DbL)
hist(raw1$HL)
hist(raw1$HD)
hist(raw1$HW)
hist(raw1$SnL)
hist(raw1$OL)
qqnorm(raw1$SL)
qqline(raw1$SL)
qqnorm(raw1$BD)
qqline(raw1$BD)
qqnorm(raw1$CPD)
qqline(raw1$CPD)
qqnorm(raw1$CPL)
qqline(raw1$CPL)
qqnorm(raw1$PreDL)
qqline(raw1$PreDL)
qqnorm(raw1$DbL)
qqline(raw1$DbL)
qqnorm(raw1$HL)
qqline(raw1$HL)
qqnorm(raw1$HD)
qqline(raw1$HD)
qqnorm(raw1$HW)
qqline(raw1$HW)
qqnorm(raw1$SnL)
qqline(raw1$SnL)
qqnorm(raw1$OL)
qqline(raw1$OL)
Since all of the continuous characters were not normal, I will log transform them, and proceed with the transformed data in all analyses.
raw1[paste0(names(raw1), '_log')] <- log(raw1[, 26:35], 10)
#for some reason did the log of the whole data frame instead of those specific columns, so I will just remove the unnecessary columns instead of spending time on what went wrong.
raw1 <- raw1[-c(37:60)]
raw1 <- raw1[-c(48)]
#double checking normality with a SW test and QQ plot
shapiro.test(raw1$SL_log)
##
## Shapiro-Wilk normality test
##
## data: raw1$SL_log
## W = 0.99271, p-value = 0.1056
shapiro.test(raw1$BD_log)
##
## Shapiro-Wilk normality test
##
## data: raw1$BD_log
## W = 0.82598, p-value < 2.2e-16
shapiro.test(raw1$CPD_log)
##
## Shapiro-Wilk normality test
##
## data: raw1$CPD_log
## W = 0.99285, p-value = 0.1144
shapiro.test(raw1$CPL_log)
##
## Shapiro-Wilk normality test
##
## data: raw1$CPL_log
## W = 0.99513, p-value = 0.3824
shapiro.test(raw1$PreDL_log)
##
## Shapiro-Wilk normality test
##
## data: raw1$PreDL_log
## W = 0.93554, p-value = 8.204e-11
shapiro.test(raw1$DbL_log)
##
## Shapiro-Wilk normality test
##
## data: raw1$DbL_log
## W = 0.98042, p-value = 0.0001711
shapiro.test(raw1$HL_log)
##
## Shapiro-Wilk normality test
##
## data: raw1$HL_log
## W = 0.99386, p-value = 0.1984
shapiro.test(raw1$HD_log)
##
## Shapiro-Wilk normality test
##
## data: raw1$HD_log
## W = 0.99661, p-value = 0.7102
shapiro.test(raw1$HW_log)
##
## Shapiro-Wilk normality test
##
## data: raw1$HW_log
## W = 0.99491, p-value = 0.3435
shapiro.test(raw1$SnL_log)
##
## Shapiro-Wilk normality test
##
## data: raw1$SnL_log
## W = 0.99366, p-value = 0.1788
shapiro.test(raw1$OL_log)
##
## Shapiro-Wilk normality test
##
## data: raw1$OL_log
## W = 0.99271, p-value = 0.1056
qqnorm(raw1$SL_log)
qqline(raw1$SL_log)
qqnorm(raw1$BD_log)
qqline(raw1$BD_log)
qqnorm(raw1$CPD_log)
qqline(raw1$CPD_log)
qqnorm(raw1$CPL_log)
qqline(raw1$CPL_log)
qqnorm(raw1$PreDL_log)
qqline(raw1$PreDL_log)
qqnorm(raw1$DbL_log)
qqline(raw1$DbL_log)
qqnorm(raw1$HL_log)
qqline(raw1$HL_log)
qqnorm(raw1$HD_log)
qqline(raw1$HD_log)
qqnorm(raw1$HW_log)
qqline(raw1$HW_log)
qqnorm(raw1$SnL_log)
qqline(raw1$SnL_log)
qqnorm(raw1$OL_log)
qqline(raw1$OL_log)
Need to try other transformations for body depth, predorsal length, and dorsal base length. Will try squareroot or cuberoot to see what gets them normal. Cubed got them closest to normal (bd still not normal but other two are).
#raw1$sqR_BD ='^'(raw1$BD,1/2)
#raw1$sqR_PreDL ='^'(raw1$PreDL,1/2)
#raw1$sqR_DbL ='^'(raw1$DbL,1/2)
#shapiro.test(raw1$sqR_BD) #didn't work
#shapiro.test(raw1$sqR_PreDL)#worked
#shapiro.test(raw1$sqR_DbL)#worked
#qqnorm(raw1$sqR_BD)
#qqline(raw1$sqR_BD)
#qqnorm(raw1$sqR_PreDL)
#qqline(raw1$sqR_PreDL)
#qqnorm(raw1$sqR_DbL)
#qqline(raw1$sqR_PreDL)
#cubed root
raw1$cbR_BD ='^'(raw1$BD,1/3)
raw1$cbR_PreDL ='^'(raw1$PreDL,1/3)
raw1$cbR_DbL ='^'(raw1$DbL,1/3)
shapiro.test(raw1$cbR_BD) #didn't work, but closest to normal
##
## Shapiro-Wilk normality test
##
## data: raw1$cbR_BD
## W = 0.9892, p-value = 0.01468
shapiro.test(raw1$cbR_PreDL)#worked
##
## Shapiro-Wilk normality test
##
## data: raw1$cbR_PreDL
## W = 0.9939, p-value = 0.2027
shapiro.test(raw1$cbR_DbL)#worked
##
## Shapiro-Wilk normality test
##
## data: raw1$cbR_DbL
## W = 0.99582, p-value = 0.5246
qqnorm(raw1$cbR_BD)
qqline(raw1$cbR_BD)
qqnorm(raw1$cbR_PreDL)
qqline(raw1$cbR_PreDL)
qqnorm(raw1$cbR_DbL)
qqline(raw1$cbR_PreDL)
#raw1 <- raw1[-c(48:50)]
#need to cube root SL to use in regressions
raw1$cbR_SL ='^'(raw1$SL,1/3)
Since amazons are in general bigger than sailfin, we don’t want any results to be due to this difference in body size bias. Therefore, we will see what traits are influenced by body size (regressions) and correct for body size when necessary (absolute value of residuals). We can then use the residuals when comparing between species for traits that are influenced by body size, and raw/transformed data for traits that are not influenced by body size. I will also calculate standardized residuals to compare residuals across traits in later analyses.
Quick results summary: traits not influenced by body size are left & right pelvic, anal, scales above and below lateral line (except in mexicana), scales before dorsal fin and fluctuating asymmetry; all other traits influenced by body size.
library(ggplot2)
library(ggpubr)
lat <- raw1[raw1$SPP == "p.latipinna",]
form <- raw1[raw1$SPP == "p.formosa",]
mex <- raw1[raw1$SPP == "p.mexicana",]
##### LAT #####
reg.lat.D <- lm(lat$D ~ lat$SL)
sd.lat.D <- rstandard(reg.lat.D)
reg.lat.D.plot <- ggplot(lat, aes(x = SL, y = D)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
reg.lat.D.plot
## `geom_smooth()` using formula = 'y ~ x'
reg.lat.P1 <- lm(lat$P1 ~ lat$SL)
sd.lat.P1 <- rstandard(reg.lat.P1)
reg.lat.P1.plot <- ggplot(lat, aes(x = SL, y = P1)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
reg.lat.P1.plot
## `geom_smooth()` using formula = 'y ~ x'
reg.lat.P2.L <- lm(lat$P2.L ~ lat$SL)
sd.lat.P2.L <- rstandard(reg.lat.P2.L)
reg.lat.P2.L.plot <- ggplot(lat, aes(x = SL, y = P2.L)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
reg.lat.P2.L.plot
## `geom_smooth()` using formula = 'y ~ x'
reg.lat.P2.R <- lm(lat$P2.R ~ lat$SL)
sd.lat.P2.R <- rstandard(reg.lat.P2.R)
reg.lat.P2.R.plot <- ggplot(lat, aes(x = SL, y = P2.R)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
reg.lat.P2.R.plot
## `geom_smooth()` using formula = 'y ~ x'
reg.lat.A <- lm(lat$A ~ lat$SL)
sd.lat.A <- rstandard(reg.lat.A)
reg.lat.A.plot <- ggplot(lat, aes(x = SL, y = A)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
reg.lat.A.plot
## `geom_smooth()` using formula = 'y ~ x'
reg.lat.P1.R <- lm(lat$P1.R ~ lat$SL)
sd.lat.P1.R <- rstandard(reg.lat.P1.R)
reg.lat.P1.R.plot <- ggplot(lat, aes(x = SL, y = P1.R)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
reg.lat.P1.R.plot
## `geom_smooth()` using formula = 'y ~ x'
reg.lat.LLSC <- lm(lat$LLSC ~ lat$SL)
sd.lat.LLSC <- rstandard(reg.lat.LLSC)
reg.lat.LLSC.plot <- ggplot(lat, aes(x = SL, y = LLSC)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
reg.lat.LLSC.plot
## `geom_smooth()` using formula = 'y ~ x'
reg.lat.SALL <- lm(lat$SALL ~ lat$SL)
sd.lat.SALL <- rstandard(reg.lat.SALL)
reg.lat.SALL.plot <- ggplot(lat, aes(x = SL, y = SALL)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
reg.lat.SALL.plot
## `geom_smooth()` using formula = 'y ~ x'
reg.lat.SBLL <- lm(lat$SBLL ~ lat$SL)
sd.lat.SBLL <- rstandard(reg.lat.SBLL)
reg.lat.SBLL.plot <- ggplot(lat, aes(x = SL, y = SBLL)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
reg.lat.SBLL.plot
## `geom_smooth()` using formula = 'y ~ x'
reg.lat.SBDF <- lm(lat$SBDF ~ lat$SL)
sd.lat.SBDF <- rstandard(reg.lat.SBDF)
reg.lat.SBDF.plot <- ggplot(lat, aes(x = SL, y = SBDF)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
reg.lat.SBDF.plot
## `geom_smooth()` using formula = 'y ~ x'
reg.lat.BD <- lm(lat$cbR_BD ~ lat$cbR_SL)
sd.lat.BD <- rstandard(reg.lat.BD)
reg.lat.BD.plot <- ggplot(lat, aes(x = cbR_SL, y = cbR_BD)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
reg.lat.BD.plot
## `geom_smooth()` using formula = 'y ~ x'
reg.lat.CPD <- lm(lat$CPD_log ~ lat$SL_log)
sd.lat.CPD <- rstandard(reg.lat.CPD)
reg.lat.CPD.plot <- ggplot(lat, aes(x = SL_log, y = CPD_log)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
reg.lat.CPD.plot
## `geom_smooth()` using formula = 'y ~ x'
reg.lat.CPL <- lm(lat$CPL_log ~ lat$SL_log)
sd.lat.CPL <- rstandard(reg.lat.CPL)
reg.lat.CPL.plot <- ggplot(lat, aes(x = SL_log, y = CPL_log)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
reg.lat.CPL.plot
## `geom_smooth()` using formula = 'y ~ x'
reg.lat.PreDL <- lm(lat$cbR_PreDL ~ lat$cbR_SL)
sd.lat.PreDL <- rstandard(reg.lat.PreDL)
reg.lat.PreDL.plot <- ggplot(lat, aes(x = cbR_SL, y = cbR_PreDL)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
reg.lat.PreDL.plot
## `geom_smooth()` using formula = 'y ~ x'
reg.lat.DbL <- lm(lat$cbR_DbL ~ lat$cbR_SL)
sd.lat.DbL <- rstandard(reg.lat.DbL)
reg.lat.DbL.plot <- ggplot(lat, aes(x = cbR_SL, y = cbR_DbL)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
reg.lat.DbL.plot
## `geom_smooth()` using formula = 'y ~ x'
reg.lat.HL <- lm(lat$HL_log ~ lat$SL_log)
sd.lat.HL <- rstandard(reg.lat.HL)
reg.lat.HL.plot <- ggplot(lat, aes(x = SL_log, y = HL_log)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
reg.lat.HL.plot
## `geom_smooth()` using formula = 'y ~ x'
reg.lat.HD <- lm(lat$HD_log ~ lat$SL_log)
sd.lat.HD <- rstandard(reg.lat.HD)
reg.lat.HD.plot <- ggplot(lat, aes(x = SL_log, y = HD_log)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
reg.lat.HD.plot
## `geom_smooth()` using formula = 'y ~ x'
reg.lat.HW <- lm(lat$HW_log ~ lat$SL_log)
sd.lat.HW <- rstandard(reg.lat.HW)
reg.lat.HW.plot <- ggplot(lat, aes(x = SL_log, y = HW_log)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
reg.lat.HW.plot
## `geom_smooth()` using formula = 'y ~ x'
reg.lat.SnL <- lm(lat$SnL_log ~ lat$SL_log)
sd.lat.SnL <- rstandard(reg.lat.SnL)
reg.lat.SnL.plot <- ggplot(lat, aes(x = SL_log, y = SnL_log)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
reg.lat.SnL.plot
## `geom_smooth()` using formula = 'y ~ x'
reg.lat.OL <- lm(lat$OL_log ~ lat$SL_log)
sd.lat.OL <- rstandard(reg.lat.OL)
reg.lat.OL.plot <- ggplot(lat, aes(x = SL_log, y = OL_log)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
reg.lat.OL.plot
## `geom_smooth()` using formula = 'y ~ x'
reg.lat.FLA <- lm(lat$FLA ~ lat$SL)
sd.lat.FLA <- rstandard(reg.lat.FLA)
reg.lat.FLA.plot <- ggplot(lat, aes(x = SL, y = FLA)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
reg.lat.FLA.plot
## `geom_smooth()` using formula = 'y ~ x'
##### FORM #####
reg.form.D <- lm(form$D ~ form$SL)
sd.form.D <- rstandard(reg.form.D)
reg.form.D.plot <- ggplot(form, aes(x =SL, y = D)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
reg.form.D.plot
## `geom_smooth()` using formula = 'y ~ x'
reg.form.P1 <- lm(form$P1 ~ form$SL)
sd.form.P1 <- rstandard(reg.form.P1)
reg.form.P1.plot <- ggplot(form, aes(x = SL, y = P1)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
reg.form.P1.plot
## `geom_smooth()` using formula = 'y ~ x'
reg.form.P2.L <- lm(form$P2.L ~ form$SL)
sd.form.P2.L <- rstandard(reg.form.P2.L)
reg.form.P2.L.plot <- ggplot(form, aes(x = SL, y = P2.L)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
reg.form.P2.L.plot
## `geom_smooth()` using formula = 'y ~ x'
reg.form.P2.R <- lm(form$P2.R ~ form$SL)
sd.form.P2.R <- rstandard(reg.form.P2.R)
reg.form.P2.R.plot <- ggplot(form, aes(x = SL, y = P2.R)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
reg.form.P2.R.plot
## `geom_smooth()` using formula = 'y ~ x'
reg.form.A <- lm(form$A ~ form$SL)
sd.form.A <- rstandard(reg.form.A)
reg.form.A.plot <- ggplot(form, aes(x = SL, y = A)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
reg.form.A.plot
## `geom_smooth()` using formula = 'y ~ x'
reg.form.P1.R <- lm(form$P1.R ~ form$SL)
sd.form.P1.R <- rstandard(reg.form.P1.R)
reg.form.P1.R.plot <- ggplot(form, aes(x = SL, y = P1.R)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
reg.form.P1.R.plot
## `geom_smooth()` using formula = 'y ~ x'
reg.form.LLSC <- lm(form$LLSC ~ form$SL)
sd.form.LLSC <- rstandard(reg.form.LLSC)
reg.form.LLSC.plot <- ggplot(form, aes(x = SL, y = LLSC)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
reg.form.LLSC.plot
## `geom_smooth()` using formula = 'y ~ x'
reg.form.SALL <- lm(form$SALL ~ form$SL)
sd.form.SALL <- rstandard(reg.form.SALL)
reg.form.SALL.plot <- ggplot(form, aes(x = SL, y = SALL)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
reg.form.SALL.plot
## `geom_smooth()` using formula = 'y ~ x'
reg.form.SBLL <- lm(form$SBLL ~ form$SL)
sd.form.SBLL <- rstandard(reg.form.SBLL)
reg.form.SBLL.plot <- ggplot(form, aes(x = SL, y = SBLL)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
reg.form.SBLL.plot
## `geom_smooth()` using formula = 'y ~ x'
reg.form.SBDF <- lm(form$SBDF ~ form$SL)
sd.form.SBDF <- rstandard(reg.form.SBDF)
reg.form.SBDF.plot <- ggplot(form, aes(x = SL, y = SBDF)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
reg.form.SBDF.plot
## `geom_smooth()` using formula = 'y ~ x'
reg.form.BD <- lm(form$cbR_BD ~ form$cbR_SL)
sd.form.BD <- rstandard(reg.form.BD)
reg.form.BD.plot <- ggplot(form, aes(x = cbR_SL, y = cbR_BD)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
reg.form.BD.plot
## `geom_smooth()` using formula = 'y ~ x'
reg.form.CPD <- lm(form$CPD_log ~ form$SL_log)
sd.form.CPD <- rstandard(reg.form.CPD)
reg.form.CPD.plot <- ggplot(form, aes(x = SL_log, y = CPD_log)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
reg.form.CPD.plot
## `geom_smooth()` using formula = 'y ~ x'
reg.form.CPL <- lm(form$CPL_log ~ form$SL_log)
sd.form.CPL <- rstandard(reg.form.CPL)
reg.form.CPL.plot <- ggplot(form, aes(x = SL_log, y = CPL_log)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
reg.form.CPL.plot
## `geom_smooth()` using formula = 'y ~ x'
reg.form.PreDL <- lm(form$cbR_PreDL ~ form$cbR_SL)
sd.form.PreDL <- rstandard(reg.form.PreDL)
reg.form.PreDL.plot <- ggplot(form, aes(x = cbR_SL, y = cbR_PreDL)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
reg.form.PreDL.plot
## `geom_smooth()` using formula = 'y ~ x'
reg.form.DbL <- lm(form$cbR_DbL ~ form$cbR_SL)
sd.form.DbL <- rstandard(reg.form.DbL)
reg.form.DbL.plot <- ggplot(form, aes(x = cbR_SL, y = cbR_DbL)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
reg.form.DbL.plot
## `geom_smooth()` using formula = 'y ~ x'
reg.form.HL <- lm(form$HL_log ~ form$SL_log)
sd.form.HL <- rstandard(reg.form.HL)
reg.form.HL.plot <- ggplot(form, aes(x = SL_log, y = HL_log)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
reg.form.HL.plot
## `geom_smooth()` using formula = 'y ~ x'
reg.form.HD <- lm(form$HD_log ~ form$SL_log)
sd.form.HD <- rstandard(reg.form.HD)
reg.form.HD.plot <- ggplot(form, aes(x = SL_log, y = HD_log)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
reg.form.HD.plot
## `geom_smooth()` using formula = 'y ~ x'
reg.form.HW <- lm(form$HW_log ~ form$SL_log)
sd.form.HW <- rstandard(reg.form.HW)
reg.form.HW.plot <- ggplot(form, aes(x = SL_log, y = HW_log)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
reg.form.HW.plot
## `geom_smooth()` using formula = 'y ~ x'
reg.form.SnL <- lm(form$SnL_log ~ form$SL_log)
sd.form.SnL <- rstandard(reg.form.SnL)
reg.form.SnL.plot <- ggplot(form, aes(x = SL_log, y = SnL_log)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
reg.form.SnL.plot
## `geom_smooth()` using formula = 'y ~ x'
reg.form.OL <- lm(form$OL_log ~ form$SL_log)
sd.form.OL <- rstandard(reg.form.OL)
reg.form.OL.plot <- ggplot(form, aes(x = SL_log, y = OL_log)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
reg.form.OL.plot
## `geom_smooth()` using formula = 'y ~ x'
reg.form.FLA <- lm(form$FLA ~ form$SL)
sd.form.FLA <- rstandard(reg.form.FLA)
reg.form.FLA.plot <- ggplot(form, aes(x = SL, y = FLA)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
reg.form.FLA.plot
## `geom_smooth()` using formula = 'y ~ x'
##### MEX #####
reg.mex.D <- lm(mex$D ~ mex$SL)
sd.mex.D <- rstandard(reg.mex.D)
reg.mex.D.plot <- ggplot(mex, aes(x = SL, y = D)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
reg.mex.D.plot
## `geom_smooth()` using formula = 'y ~ x'
reg.mex.P1 <- lm(mex$P1 ~ mex$SL)
sd.mex.P1 <- rstandard(reg.mex.P1)
reg.mex.P1.plot <- ggplot(mex, aes(x = SL, y = P1)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
reg.mex.P1.plot
## `geom_smooth()` using formula = 'y ~ x'
reg.mex.P2.L <- lm(mex$P2.L ~ mex$SL)
sd.mex.P2.L <- rstandard(reg.mex.P2.L)
reg.mex.P2.L.plot <- ggplot(mex, aes(x = SL, y = P2.L)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
reg.mex.P2.L.plot
## `geom_smooth()` using formula = 'y ~ x'
reg.mex.P2.R <- lm(mex$P2.R ~ mex$SL)
sd.mex.P2.R <- rstandard(reg.mex.P2.R)
reg.mex.P2.R.plot <- ggplot(mex, aes(x = SL, y = P2.R)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
reg.mex.P2.R.plot
## `geom_smooth()` using formula = 'y ~ x'
reg.mex.A <- lm(mex$A ~ mex$SL)
sd.mex.A <- rstandard(reg.mex.A)
reg.mex.A.plot <- ggplot(mex, aes(x = SL, y = A)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
reg.mex.A.plot
## `geom_smooth()` using formula = 'y ~ x'
reg.mex.P1.R <- lm(mex$P1.R ~ mex$SL)
sd.mex.P1.R <- rstandard(reg.mex.P1.R)
reg.mex.P1.R.plot <- ggplot(mex, aes(x = SL, y = P1.R)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
reg.mex.P1.R.plot
## `geom_smooth()` using formula = 'y ~ x'
reg.mex.LLSC <- lm(mex$LLSC ~ mex$SL)
sd.mex.LLSC <- rstandard(reg.mex.LLSC)
reg.mex.LLSC.plot <- ggplot(mex, aes(x = SL, y = LLSC)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
reg.mex.LLSC.plot
## `geom_smooth()` using formula = 'y ~ x'
reg.mex.SALL <- lm(mex$SALL ~ mex$SL)
sd.mex.SALL <- rstandard(reg.mex.SALL)
reg.mex.SALL.plot <- ggplot(mex, aes(x = SL, y = SALL)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
reg.mex.SALL.plot
## `geom_smooth()` using formula = 'y ~ x'
reg.mex.SBLL <- lm(mex$SBLL ~ mex$SL)
sd.mex.SBLL <- rstandard(reg.mex.SBLL)
reg.mex.SBLL.plot <- ggplot(mex, aes(x = SL, y = SBLL)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
reg.mex.SBLL.plot
## `geom_smooth()` using formula = 'y ~ x'
reg.mex.SBDF <- lm(mex$SBDF ~ mex$SL)
sd.mex.SBDF <- rstandard(reg.mex.SBDF)
reg.mex.SBDF.plot <- ggplot(mex, aes(x = SL, y = SBDF)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
reg.mex.SBDF.plot
## `geom_smooth()` using formula = 'y ~ x'
reg.mex.BD <- lm(mex$cbR_BD ~ mex$cbR_SL)
sd.mex.BD <- rstandard(reg.mex.BD)
reg.mex.BD.plot <- ggplot(mex, aes(x = cbR_SL, y = cbR_BD)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
reg.mex.BD.plot
## `geom_smooth()` using formula = 'y ~ x'
reg.mex.CPD <- lm(mex$CPD_log ~ mex$SL_log)
sd.mex.CPD <- rstandard(reg.mex.CPD)
reg.mex.CPD.plot <- ggplot(mex, aes(x = SL_log, y = CPD_log)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
reg.mex.CPD.plot
## `geom_smooth()` using formula = 'y ~ x'
reg.mex.CPL <- lm(mex$CPL_log ~ mex$SL_log)
sd.mex.CPL <- rstandard(reg.mex.CPL)
reg.mex.CPL.plot <- ggplot(mex, aes(x = SL_log, y = CPL_log)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
reg.mex.CPL.plot
## `geom_smooth()` using formula = 'y ~ x'
reg.mex.PreDL <- lm(mex$cbR_PreDL ~ mex$cbR_SL)
sd.mex.PreDL <- rstandard(reg.mex.PreDL)
reg.mex.PreDL.plot <- ggplot(mex, aes(x = cbR_SL, y = cbR_PreDL)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
reg.mex.PreDL.plot
## `geom_smooth()` using formula = 'y ~ x'
reg.mex.DbL <- lm(mex$cbR_DbL ~ mex$cbR_SL)
sd.mex.DbL <- rstandard(reg.mex.DbL)
reg.mex.DbL.plot <- ggplot(mex, aes(x = cbR_SL, y = cbR_DbL)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
reg.mex.DbL.plot
## `geom_smooth()` using formula = 'y ~ x'
reg.mex.HL <- lm(mex$HL_log ~ mex$SL_log)
sd.mex.HL <- rstandard(reg.mex.HL)
reg.mex.HL.plot <- ggplot(mex, aes(x = SL_log, y = HL_log)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
reg.mex.HL.plot
## `geom_smooth()` using formula = 'y ~ x'
reg.mex.HD <- lm(mex$HD_log ~ mex$SL_log)
sd.mex.HD <- rstandard(reg.mex.HD)
reg.mex.HD.plot <- ggplot(mex, aes(x = SL_log, y = HD_log)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
reg.mex.HD.plot
## `geom_smooth()` using formula = 'y ~ x'
reg.mex.HW <- lm(mex$HW_log ~ mex$SL_log)
sd.mex.HW <- rstandard(reg.mex.HW)
reg.mex.HW.plot <- ggplot(mex, aes(x = SL_log, y = HW_log)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
reg.mex.HW.plot
## `geom_smooth()` using formula = 'y ~ x'
reg.mex.SnL <- lm(mex$SnL_log ~ mex$SL_log)
sd.mex.SnL <- rstandard(reg.mex.SnL)
reg.mex.SnL.plot <- ggplot(mex, aes(x = SL_log, y = SnL_log)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
reg.mex.SnL.plot
## `geom_smooth()` using formula = 'y ~ x'
reg.mex.OL <- lm(mex$OL_log ~ mex$SL_log)
sd.mex.OL <- rstandard(reg.mex.OL)
reg.mex.OL.plot <- ggplot(mex, aes(x = SL_log, y = OL_log)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
reg.mex.OL.plot
## `geom_smooth()` using formula = 'y ~ x'
reg.mex.FLA <- lm(mex$FLA ~ mex$SL)
sd.mex.FLA <- rstandard(reg.mex.FLA)
reg.mex.FLA.plot <- ggplot(mex, aes(x = SL, y = FLA)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
reg.mex.FLA.plot
## `geom_smooth()` using formula = 'y ~ x'
STEP TWO: get residuals for each individual for traits that were influenced by body size
STEP THREE: convert residuals to absolute value
##### LAT #####
abs.lat.D <- abs(res.lat.D)
mean(abs.lat.D)
## [1] 0.5375505
abs.lat.P1 <- abs(res.lat.P1)
mean(abs.lat.P1)
## [1] 0.5466667
abs.lat.P1.R <- abs(res.lat.P1.R)
mean(abs.lat.P1.R)
## [1] 0.5799393
abs.lat.LLSC <- abs(res.lat.LLSC)
mean(abs.lat.LLSC)
## [1] 0.7664044
abs.lat.SBLL <- abs(res.lat.SBLL)
mean(abs.lat.SBLL)
## [1] 0.2461336
abs.lat.BD <- abs(res.lat.BD)
mean(abs.lat.BD)
## [1] 0.04778326
abs.lat.CPD <- abs(res.lat.CPD)
mean(abs.lat.CPD)
## [1] 0.03695739
abs.lat.CPL <- abs(res.lat.CPL)
mean(abs.lat.CPL)
## [1] 0.03732998
abs.lat.PreDL <- abs(res.lat.PreDL)
mean(abs.lat.PreDL)
## [1] 0.02707836
abs.lat.DbL <- abs(res.lat.DbL)
mean(abs.lat.DbL)
## [1] 0.0516366
abs.lat.HL <- abs(res.lat.HL)
mean(abs.lat.HL)
## [1] 0.03588147
abs.lat.HD <- abs(res.lat.HD)
mean(abs.lat.HD)
## [1] 0.03506907
abs.lat.HW <- abs(res.lat.HW)
mean(abs.lat.HW)
## [1] 0.03208151
abs.lat.SnL <- abs(res.lat.SnL)
mean(abs.lat.SnL)
## [1] 0.03208694
abs.lat.OL <- abs(res.lat.OL)
mean(abs.lat.OL)
## [1] 1.416081e-17
##### FORM #####
abs.form.D <- abs(res.form.D)
mean(abs.form.D)
## [1] 0.5668177
abs.form.P1 <- abs(res.form.P1)
mean(abs.form.P1)
## [1] 0.4843616
abs.form.P1.R <- abs(res.form.P1.R)
mean(abs.form.P1.R)
## [1] 0.4242033
abs.form.LLSC <- abs(res.form.LLSC)
mean(abs.form.LLSC)
## [1] 0.9038801
abs.form.SBLL <- abs(res.form.SBLL)
mean(abs.form.SBLL)
## [1] 0.3399272
abs.form.BD <- abs(res.form.BD)
mean(abs.form.BD)
## [1] 0.04710005
abs.form.CPD <- abs(res.form.CPD)
mean(abs.form.CPD)
## [1] 0.03046568
abs.form.CPL <- abs(res.form.CPL)
mean(abs.form.CPL)
## [1] 0.03738523
abs.form.PreDL <- abs(res.form.PreDL)
mean(abs.form.PreDL)
## [1] 0.02618636
abs.form.DbL <- abs(res.form.DbL)
mean(abs.form.DbL)
## [1] 0.05067905
abs.form.HL <- abs(res.form.HL)
mean(abs.form.HL)
## [1] 0.03919057
abs.form.HD <- abs(res.form.HD)
mean(abs.form.HD)
## [1] 0.03507274
abs.form.HW <- abs(res.form.HW)
mean(abs.form.HW)
## [1] 0.03744209
abs.form.SnL <- abs(res.form.SnL)
mean(abs.form.SnL)
## [1] 0.03321741
abs.form.OL <- abs(res.form.OL)
mean(abs.form.OL)
## [1] 6.018764e-18
##### MEX #####
abs.mex.D <- abs(res.mex.D)
mean(abs.mex.D)
## [1] 0.1657275
abs.mex.P1 <- abs(res.mex.P1)
mean(abs.mex.P1)
## [1] 0.5686425
abs.mex.P1.R <- abs(res.mex.P1.R)
mean(abs.mex.P1.R)
## [1] 0.458723
abs.mex.LLSC <- abs(res.mex.LLSC)
mean(abs.mex.LLSC)
## [1] 0.4434954
abs.mex.SBLL <- abs(res.mex.SBLL)
mean(abs.mex.SBLL)
## [1] 0.2713231
abs.mex.BD <- abs(res.mex.BD)
mean(abs.mex.BD)
## [1] 0.04568114
abs.mex.CPD <- abs(res.mex.CPD)
mean(abs.mex.CPD)
## [1] 0.03277006
abs.mex.CPL <- abs(res.mex.CPL)
mean(abs.mex.CPL)
## [1] 0.03878753
abs.mex.PreDL <- abs(res.mex.PreDL)
mean(abs.mex.PreDL)
## [1] 0.06019307
abs.mex.DbL <- abs(res.mex.DbL)
mean(abs.mex.DbL)
## [1] 0.04686893
abs.mex.HL <- abs(res.mex.HL)
mean(abs.mex.HL)
## [1] 0.05562721
abs.mex.HD <- abs(res.mex.HD)
mean(abs.mex.HD)
## [1] 0.03668815
abs.mex.HW <- abs(res.mex.HW)
mean(abs.mex.HW)
## [1] 0.03882769
abs.mex.SnL <- abs(res.mex.SnL)
mean(abs.mex.SnL)
## [1] 0.04900589
abs.mex.OL <- abs(res.mex.OL)
mean(abs.mex.OL)
## [1] 4.001035e-18
#let's get this into the raw1 data set so that we can plot this more easily
abs.res.D <- c(abs.lat.D, abs.form.D, abs.mex.D)
abs.res.P1 <- c(abs.lat.P1, abs.form.P1, abs.mex.P1)
abs.res.P1.R <- c(abs.lat.P1.R, abs.form.P1.R, abs.mex.P1.R)
abs.res.LLSC<- c(abs.lat.LLSC, abs.form.LLSC, abs.mex.LLSC)
abs.res.SBLL<- c(abs.lat.SBLL, abs.form.SBLL, abs.mex.SBLL)
abs.res.BD<- c(abs.lat.BD, abs.form.BD, abs.mex.BD)
abs.res.CPD<- c(abs.lat.CPD, abs.form.CPD, abs.mex.CPD)
abs.res.CPL<- c(abs.lat.CPL, abs.form.CPL, abs.mex.CPL)
abs.res.PreDL <- c(abs.lat.PreDL, abs.form.PreDL, abs.mex.PreDL)
abs.res.DbL <- c(abs.lat.DbL, abs.form.DbL, abs.mex.DbL)
abs.res.HL<- c(abs.lat.HL, abs.form.HL, abs.mex.HL)
abs.res.HD<- c(abs.lat.HD, abs.form.HD, abs.mex.HD)
abs.res.HW <- c(abs.lat.HW, abs.form.HW, abs.mex.HW)
abs.res.SnL <- c(abs.lat.SnL, abs.form.SnL, abs.mex.SnL)
abs.res.OL <- c(abs.lat.OL, abs.form.OL, abs.mex.OL)
raw2 <- cbind(raw1, abs.res.D, abs.res.P1, abs.res.P1.R, abs.res.LLSC, abs.res.SBLL, abs.res.BD, abs.res.CPD, abs.res.CPL, abs.res.PreDL, abs.res.DbL, abs.res.HL, abs.res.HD, abs.res.HW, abs.res.SnL, abs.res.OL)
library(ggbeeswarm)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
ggplot(raw2, aes(SPP, abs.res.D)) +
geom_point(alpha=0.3) +
stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar",
width=0.03, colour="red", alpha=0.7) +
stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)
scatter_violin <- ggplot(data=raw2, aes(x=SPP, y=abs.res.D)) +
geom_violin(trim = FALSE) +
stat_summary(
fun.data = "mean_sdl", fun.args = list(mult = 1),
geom = "pointrange", color = "black"
)
print(scatter_violin)
scatter_violin1 <- ggplot(data=raw2, aes(x=SPP, y=abs.res.D)) +
geom_violin(trim = FALSE) +
stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar",
width=0.03, colour="red", alpha=0.7) +
stat_summary(fun=mean, geom="crossbar", fill="red", width=0.03)
print(scatter_violin1)
ggplot(raw2, aes(SPP, abs.res.P1)) +
geom_point(alpha=0.3) +
stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar",
width=0.03, colour="red", alpha=0.7) +
stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)
ggplot(raw2, aes(SPP, abs.res.P1.R)) +
geom_point(alpha=0.3) +
stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar",
width=0.03, colour="red", alpha=0.7) +
stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)
ggplot(raw2, aes(SPP, abs.res.LLSC)) +
geom_point(alpha=0.3) +
stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar",
width=0.03, colour="red", alpha=0.7) +
stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)
ggplot(raw2, aes(SPP, abs.res.SBLL)) +
geom_point(alpha=0.3) +
stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar",
width=0.03, colour="red", alpha=0.7) +
stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)
ggplot(raw2, aes(SPP, abs.res.BD)) +
geom_point(alpha=0.3) +
stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar",
width=0.03, colour="red", alpha=0.7) +
stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)
ggplot(raw2, aes(SPP, abs.res.CPD)) +
geom_point(alpha=0.3) +
stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar",
width=0.03, colour="red", alpha=0.7) +
stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)
ggplot(raw2, aes(SPP, abs.res.CPL)) +
geom_point(alpha=0.3) +
stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar",
width=0.03, colour="red", alpha=0.7) +
stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)
ggplot(raw2, aes(SPP, abs.res.PreDL)) +
geom_point(alpha=0.3) +
stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar",
width=0.03, colour="red", alpha=0.7) +
stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)
ggplot(raw2, aes(SPP, abs.res.DbL)) +
geom_point(alpha=0.3) +
stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar",
width=0.03, colour="red", alpha=0.7) +
stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)
ggplot(raw2, aes(SPP, abs.res.HL)) +
geom_point(alpha=0.3) +
stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar",
width=0.03, colour="red", alpha=0.7) +
stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)
ggplot(raw2, aes(SPP, abs.res.HD)) +
geom_point(alpha=0.3) +
stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar",
width=0.03, colour="red", alpha=0.7) +
stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)
ggplot(raw2, aes(SPP, abs.res.HW)) +
geom_point(alpha=0.3) +
stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar",
width=0.03, colour="red", alpha=0.7) +
stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)
ggplot(raw2, aes(SPP, abs.res.SnL)) +
geom_point(alpha=0.3) +
stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar",
width=0.03, colour="red", alpha=0.7) +
stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)
ggplot(raw2, aes(SPP, abs.res.OL)) +
geom_point(alpha=0.3) +
stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar",
width=0.03, colour="red", alpha=0.7) +
stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)
(note: did non-parametric instead of transforming, but Mike said to transform, so I copied that work to the ‘morphology-analysis-final’ Rmd).
F-test on residuals doesn’t make much sense; the residuals themselves are representative of the variation present, as they are the distance from the mean. Therefore, F-test on residuals is like variance of the variance. Instead, I have to do a t-test on the absolute value of the residuals. In this sense, we want to see if the mean of the absolute residuals is higher or lower for asexual species–is the average amount of variation higher or lower for this trait? Based on the regressions, if the trait was influenced by body size, I will perform a t-test on the absolute value of the residuals. If the trait was not influenced by body size, I will perform an F-test of variance on the raw data.
Quick results summary: For the F-test on raw data, none of the traits were significantly different (P2L/R, A, SBDF, FLA). For the t-tests on residuals, the only significant traits are left pectoral (), right pectoral (lat>form), scales above lateral line (), scales below lateral line (form>lat), and head length ().
- will do two-tailed and check out the residual means to infer direction; for traits in which we use raw data, a one-tailed f-test will be perfomed in both direction to determine which species is varying more. We will also visulize the variation using a histogram to confirm direction results.
##
## Welch Two Sample t-test
##
## data: abs.lat.BD and abs.form.BD
## t = 0.16042, df = 287.17, p-value = 0.8727
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.007699582 0.009066010
## sample estimates:
## mean of x mean of y
## 0.04778326 0.04710005
##
## Welch Two Sample t-test
##
## data: abs.lat.CPD and abs.form.CPD
## t = 2.1468, df = 282.5, p-value = 0.03266
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.0005394989 0.0124439196
## sample estimates:
## mean of x mean of y
## 0.03695739 0.03046568
##
## Welch Two Sample t-test
##
## data: abs.lat.CPL and abs.form.CPL
## t = -0.015652, df = 291.31, p-value = 0.9875
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.007002590 0.006892088
## sample estimates:
## mean of x mean of y
## 0.03732998 0.03738523
##
## Welch Two Sample t-test
##
## data: abs.lat.PreDL and abs.form.PreDL
## t = 0.37319, df = 279.25, p-value = 0.7093
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.003813117 0.005597125
## sample estimates:
## mean of x mean of y
## 0.02707836 0.02618636
##
## Welch Two Sample t-test
##
## data: abs.lat.DbL and abs.form.DbL
## t = 0.19254, df = 284.63, p-value = 0.8475
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.00883155 0.01074664
## sample estimates:
## mean of x mean of y
## 0.05163660 0.05067905
##
## Welch Two Sample t-test
##
## data: abs.lat.HL and abs.form.HL
## t = -0.90175, df = 296.34, p-value = 0.3679
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.010530987 0.003912795
## sample estimates:
## mean of x mean of y
## 0.03588147 0.03919057
##
## Welch Two Sample t-test
##
## data: abs.lat.HD and abs.form.HD
## t = -0.00099329, df = 267.38, p-value = 0.9992
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.007275651 0.007268313
## sample estimates:
## mean of x mean of y
## 0.03506907 0.03507274
##
## Welch Two Sample t-test
##
## data: abs.lat.HW and abs.form.HW
## t = -1.6871, df = 297.98, p-value = 0.09263
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.0116134871 0.0008923157
## sample estimates:
## mean of x mean of y
## 0.03208151 0.03744209
##
## Welch Two Sample t-test
##
## data: abs.lat.SnL and abs.form.SnL
## t = -0.38886, df = 295.99, p-value = 0.6977
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.006851803 0.004590862
## sample estimates:
## mean of x mean of y
## 0.03208694 0.03321741
##
## Welch Two Sample t-test
##
## data: abs.lat.OL and abs.form.OL
## t = 1.318, df = 152.51, p-value = 0.1895
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -4.062607e-18 2.034670e-17
## sample estimates:
## mean of x mean of y
## 1.416081e-17 6.018764e-18
Gonna try F-tests for funsies. Would be interesting if both are varying, but in different ways (amazon with identical variance around best fit, whereas lat with more variation around best fit for example).
##
## F test to compare two variances
##
## data: abs.lat.BD and abs.form.BD
## F = 0.95859, num df = 133, denom df = 165, p-value = 0.8025
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.695102 1.329540
## sample estimates:
## ratio of variances
## 0.9585916
##
## F test to compare two variances
##
## data: abs.lat.CPD and abs.form.CPD
## F = 1.039, num df = 133, denom df = 165, p-value = 0.8121
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.7533916 1.4410320
## sample estimates:
## ratio of variances
## 1.038977
##
## F test to compare two variances
##
## data: abs.lat.CPL and abs.form.CPL
## F = 0.88034, num df = 133, denom df = 165, p-value = 0.4448
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.6383612 1.2210102
## sample estimates:
## ratio of variances
## 0.8803423
##
## F test to compare two variances
##
## data: abs.lat.PreDL and abs.form.PreDL
## F = 1.0927, num df = 133, denom df = 165, p-value = 0.5868
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.7923178 1.5154870
## sample estimates:
## ratio of variances
## 1.092659
##
## F test to compare two variances
##
## data: abs.lat.DbL and abs.form.DbL
## F = 1.003, num df = 133, denom df = 165, p-value = 0.9809
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.7272902 1.3911071
## sample estimates:
## ratio of variances
## 1.002981
##
## F test to compare two variances
##
## data: abs.lat.HL and abs.form.HL
## F = 0.75566, num df = 133, denom df = 165, p-value = 0.09293
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.5479473 1.0480733
## sample estimates:
## ratio of variances
## 0.7556556
##
## F test to compare two variances
##
## data: abs.lat.HD and abs.form.HD
## F = 1.2869, num df = 133, denom df = 165, p-value = 0.1238
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.9331834 1.7849244
## sample estimates:
## ratio of variances
## 1.286922
##
## F test to compare two variances
##
## data: abs.lat.HW and abs.form.HW
## F = 0.64101, num df = 133, denom df = 165, p-value = 0.007901
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.4648142 0.8890623
## sample estimates:
## ratio of variances
## 0.6410095
##
## F test to compare two variances
##
## data: abs.lat.SnL and abs.form.SnL
## F = 0.76714, num df = 133, denom df = 165, p-value = 0.1118
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.5562779 1.0640074
## sample estimates:
## ratio of variances
## 0.7671441
##
## F test to compare two variances
##
## data: abs.lat.OL and abs.form.OL
## F = 11.033, num df = 133, denom df = 165, p-value < 2.2e-16
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 8.000672 15.303095
## sample estimates:
## ratio of variances
## 11.03346
Logan suggested doing a glm with poisson distribution (because it is count data) for these relationships. I originally performed Mann Whitney U tests, so will also include that. Overall, results don’t differ even though exact values are slightly different.
Had to remove the poisson part, becausethe residuals didn’t meet the non-integer requirement for poisson (it freaked out and took forever to load).
raw3 <- raw2[raw2$SPP !="p.mexicana", ]
glm_D <- glm(abs.res.D~SPP, data=raw3)
print(summary(glm_D), show.residuals=TRUE)
##
## Call:
## glm(formula = abs.res.D ~ SPP, data = raw3)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.5474 -0.3920 -0.1006 0.3170 1.6061
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.56682 0.03151 17.989 <2e-16 ***
## SPPp.latipinna -0.02927 0.04715 -0.621 0.535
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 0.1648051)
##
## Null deviance: 49.175 on 299 degrees of freedom
## Residual deviance: 49.112 on 298 degrees of freedom
## AIC: 314.46
##
## Number of Fisher Scoring iterations: 2
glm_P1 <- glm(abs.res.P1~SPP, data=raw3)
print(summary(glm_P1), show.residuals=TRUE)
##
## Call:
## glm(formula = abs.res.P1 ~ SPP, data = raw3)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.5006 -0.3420 -0.1328 0.2626 2.9146
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.48436 0.03362 14.406 <2e-16 ***
## SPPp.latipinna 0.06231 0.05031 1.238 0.217
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 0.1876585)
##
## Null deviance: 56.210 on 299 degrees of freedom
## Residual deviance: 55.922 on 298 degrees of freedom
## AIC: 353.42
##
## Number of Fisher Scoring iterations: 2
glm_P1.R <- glm(abs.res.P1.R~SPP, data=raw3)
print(summary(glm_P1.R), show.residuals=TRUE)
##
## Call:
## glm(formula = abs.res.P1.R ~ SPP, data = raw3)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.5705 -0.2933 -0.1676 0.1833 1.6538
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.42420 0.03055 13.887 < 2e-16 ***
## SPPp.latipinna 0.15574 0.04571 3.407 0.000746 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 0.1548898)
##
## Null deviance: 47.955 on 299 degrees of freedom
## Residual deviance: 46.157 on 298 degrees of freedom
## AIC: 295.84
##
## Number of Fisher Scoring iterations: 2
glm_LLSC <- glm(abs.res.LLSC~SPP, data=raw3)
print(summary(glm_LLSC), show.residuals=TRUE)
##
## Call:
## glm(formula = abs.res.LLSC ~ SPP, data = raw3)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.9022 -0.4792 -0.2428 0.4210 3.5649
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.90388 0.05395 16.755 <2e-16 ***
## SPPp.latipinna -0.13748 0.08072 -1.703 0.0896 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 0.4830762)
##
## Null deviance: 145.36 on 299 degrees of freedom
## Residual deviance: 143.96 on 298 degrees of freedom
## AIC: 637.08
##
## Number of Fisher Scoring iterations: 2
This will be performed on traits that DID vary with SL.
(MW_D <- wilcox.test(abs.res.D~SPP, data=raw3, conf.int=T))
##
## Wilcoxon rank sum test with continuity correction
##
## data: abs.res.D by SPP
## W = 12186, p-value = 0.1545
## alternative hypothesis: true location shift is not equal to 0
## 95 percent confidence interval:
## -0.05689062 0.19840357
## sample estimates:
## difference in location
## 0.1030553
(MW_P1 <- wilcox.test(abs.res.P1~SPP, data=raw3, conf.int=T))
##
## Wilcoxon rank sum test with continuity correction
##
## data: abs.res.P1 by SPP
## W = 8765, p-value = 0.001606
## alternative hypothesis: true location shift is not equal to 0
## 95 percent confidence interval:
## -0.23328614 -0.07245187
## sample estimates:
## difference in location
## -0.1618195
(MW_P1R <- wilcox.test(abs.res.P1.R~SPP, data=raw3, conf.int=T))
##
## Wilcoxon rank sum test with continuity correction
##
## data: abs.res.P1.R by SPP
## W = 7363, p-value = 4.86e-07
## alternative hypothesis: true location shift is not equal to 0
## 95 percent confidence interval:
## -0.2596445 -0.1543936
## sample estimates:
## difference in location
## -0.2082393
(MW_LLSC <- wilcox.test(abs.res.LLSC~SPP, data=raw3, conf.int=T))
##
## Wilcoxon rank sum test with continuity correction
##
## data: abs.res.LLSC by SPP
## W = 12119, p-value = 0.1822
## alternative hypothesis: true location shift is not equal to 0
## 95 percent confidence interval:
## -0.03418245 0.19226705
## sample estimates:
## difference in location
## 0.0763304
I did notice that the deviance residual information was out of whack (median not super close to zero in many cases, the max and min VERY different). In the EXTRAS rscript, I ran a DHARMa test to see what the issue was, and apparently they fail the levene’s test for homogeneity of variance. This indicates that while the AVERAGE variance is not different between the two species, there could be a different in how the species are varying, which may be interesting (similar to the F-tests of the continuous residuals). Will run some levene’s test for these discrete residuals, just to see.
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
library(carData)
(L_D <- leveneTest(abs.res.D~SPP, data=raw3))
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 1 9.8483 0.00187 **
## 298
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(L_P1 <- leveneTest(abs.res.P1~SPP, data=raw3))
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 1 13.447 0.0002906 ***
## 298
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(L_P1R <- leveneTest(abs.res.P1.R~SPP, data=raw3))
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 1 3.306 0.07003 .
## 298
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(L_LLSC <- leveneTest(abs.res.LLSC~SPP, data=raw3))
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 1 2.2242 0.1369
## 298
EVEN THOUGH THESE ARE DISCRETE, NO IDEA WHAT ELSE TO DO WITH THEM (glmm on raw data will just compare means). Did both F-test and Levene’s test. Get the same overall results, albeit slightly different values. Might go with Levene’s since it is good with non-normal data.
This will only be for the traits that did not have a significant regression compared to SL (so P2s, A, SALL, SBDF, and FLA); none of these traits were transformed, so just doing raw for now.
var.left.pel <- var.test(lat$P2.L, form$P2.L, alternative = "two.sided")
var.left.pel
##
## F test to compare two variances
##
## data: lat$P2.L and form$P2.L
## F = NaN, num df = 133, denom df = 165, p-value = NA
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## NaN NaN
## sample estimates:
## ratio of variances
## NaN
ggplot(raw2, aes(SPP, P2.L)) +
geom_point(alpha=0.3) +
stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar",
width=0.03, colour="red", alpha=0.7) +
stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)
var.rig.pel <- var.test(lat$P2.R, form$P2.R, alternative = "two.sided")
var.rig.pel
##
## F test to compare two variances
##
## data: lat$P2.R and form$P2.R
## F = NaN, num df = 133, denom df = 165, p-value = NA
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## NaN NaN
## sample estimates:
## ratio of variances
## NaN
ggplot(raw2, aes(SPP, P2.R)) +
geom_point(alpha=0.3) +
stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar",
width=0.03, colour="red", alpha=0.7) +
stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)
var.anal <- var.test(lat$A, form$A, alternative = "two.sided")
var.anal
##
## F test to compare two variances
##
## data: lat$A and form$A
## F = 1.1322, num df = 133, denom df = 165, p-value = 0.4474
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.8210231 1.5703926
## sample estimates:
## ratio of variances
## 1.132245
ggplot(raw2, aes(SPP, A)) +
geom_point(alpha=0.3) +
stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar",
width=0.03, colour="red", alpha=0.7) +
stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)
var.sca.ab.ll <- var.test(lat$SALL, form$SALL, alternative = "two.sided")
var.sca.ab.ll
##
## F test to compare two variances
##
## data: lat$SALL and form$SALL
## F = 1.625, num df = 133, denom df = 165, p-value = 0.003095
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 1.178316 2.253797
## sample estimates:
## ratio of variances
## 1.624976
ggplot(raw2, aes(SPP, SALL)) +
geom_point(alpha=0.3) +
stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar",
width=0.03, colour="red", alpha=0.7) +
stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)
var.sca.df <- var.test(lat$SBDF, form$SBDF, alternative = "two.sided")
var.sca.df
##
## F test to compare two variances
##
## data: lat$SBDF and form$SBDF
## F = 0.76031, num df = 133, denom df = 165, p-value = 0.1003
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.5513254 1.0545347
## sample estimates:
## ratio of variances
## 0.7603142
ggplot(raw2, aes(SPP, SBDF)) +
geom_point(alpha=0.3) +
stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar",
width=0.03, colour="red", alpha=0.7) +
stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)
var.flu.asy <- var.test(lat$FLA, form$FLA, alternative = "two.sided")
var.flu.asy
##
## F test to compare two variances
##
## data: lat$FLA and form$FLA
## F = 0.73321, num df = 133, denom df = 165, p-value = 0.06289
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.5316692 1.0169377
## sample estimates:
## ratio of variances
## 0.733207
ggplot(raw2, aes(SPP, FLA)) +
geom_point(alpha=0.3) +
stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar",
width=0.03, colour="red", alpha=0.7) +
stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)
Still only performing this on the traits that did NOT vary with SL (P2L/R, A, SALL, SBLL [between sail and amz only], SBDF, FLA).
(LT_P2L <- leveneTest(P2.L~SPP, data=raw3)) #gives nothing since it's all the same value
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 1 NaN NaN
## 298
(LT_P2R <- leveneTest(P2.R~SPP, data=raw3)) #same as above
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 1 NaN NaN
## 298
(LT_A <- leveneTest(A~SPP, data=raw3))
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 1 0.3962 0.5295
## 298
(LT_SALL <- leveneTest(SALL~SPP, data=raw3))
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 1 0.8627 0.3537
## 298
(LT_SBLL <- leveneTest(SBLL~SPP, data=raw3))
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 1 3.36 0.0678 .
## 298
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(LT_SBDF <- leveneTest(SBDF~SPP, data=raw3))
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 1 0.0081 0.9283
## 298
(LT_FLA <- leveneTest(FLA~SPP, data=raw3))
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 1 2.7164 0.1004
## 298
WHY WON’T THIS TABLE WORK!!!!!!!!
average.table <- matrix(c(MW_D\(p.value, mean(abs.lat.D, na.rm = TRUE), mean(abs.form.D, na.rm = TRUE), MW_P1\)p.value, mean(abs.lat.P1, na.rm = TRUE), mean(abs.form.P1, na.rm = TRUE), LT_P2R\("Pr(>F)", mean(lat\)P2.R, na.rm = TRUE), mean(form\(P2.R, na.rm = TRUE), LT_P2R\)“Pr(>F)”, mean(lat\(P2.L, na.rm = TRUE), mean(form\)P2.L, na.rm = TRUE), LT_A\("Pr(>F)", mean(lat\)A, na.rm = TRUE), mean(form\(A, na.rm = TRUE), MW_P1R\)p.value, mean(abs.lat.P1.R, na.rm = TRUE), mean(abs.form.P1.R, na.rm = TRUE), MW_LLSC\(p.value, mean(abs.lat.LLSC, na.rm = TRUE), mean(abs.form.LLSC, na.rm = TRUE), LT_SALL\)“Pr(>F)”, mean(lat\(SALL, na.rm = TRUE), mean(form\)SALL, na.rm = TRUE), LT_SBLL\("Pr(>F)", mean(abs.lat.SBLL, na.rm = TRUE), mean(abs.form.SBLL, na.rm = TRUE), LT_SBDF\)“Pr(>F)”, mean(lat\(SBDF, na.rm = TRUE), mean(form\)SBDF, na.rm = TRUE), ttest.abs.BD\(p.value, mean(abs.lat.BD, na.rm = TRUE), mean(abs.form.BD, na.rm = TRUE), ttest.abs.CPD\)p.value, mean(abs.lat.CPD, na.rm = TRUE), mean(abs.form.CPD, na.rm = TRUE), ttest.abs.CPL\(p.value, mean(abs.lat.CPL, na.rm = TRUE), mean(abs.form.CPL, na.rm = TRUE), ttest.abs.PreDL\)p.value, mean(abs.lat.PreDL, na.rm = TRUE), mean(abs.form.PreDL, na.rm = TRUE), ttest.abs.DbL\(p.value, mean(abs.lat.DbL, na.rm = TRUE), mean(abs.form.DbL, na.rm = TRUE), ttest.abs.HL\)p.value, mean(abs.lat.HL, na.rm = TRUE), mean(abs.form.HL, na.rm = TRUE), ttest.abs.HD\(p.value, mean(abs.lat.HD, na.rm = TRUE), mean(abs.form.HD, na.rm = TRUE), ttest.abs.HW\)p.value, mean(abs.lat.HW, na.rm = TRUE), mean(abs.form.HW, na.rm = TRUE), ttest.abs.SnL\(p.value, mean(abs.lat.SnL, na.rm = TRUE), mean(abs.form.SnL, na.rm = TRUE), ttest.abs.OL\)p.value, mean(abs.lat.OL, na.rm = TRUE), mean(abs.form.OL, na.rm = TRUE), LT_FLA\("Pr(>F)", mean(lat\)FLA, na.rm = TRUE), mean(form$FLA, na.rm = TRUE)), ncol=3, byrow=TRUE)
colnames(average.table)<- c(“p-value for variance”, “lat mean”, “form mean”)
rownames(average.table) <- c(“dorsal rays”, “L pect rays”, “R pelvic rays”, ”L pelvic rays”, “Anal rays”, “R pect rays”, ”lat line scales”, ”scales above LL”, ”scales below LL”, ”scales before dor”, “body dep”, “peduncle dep”, “peduncle leng”, “pre-dorsal”, “dorsal base”, “head length”, “head depth”, “head width”, “snout leng”, “orbital leng”, “fluct. asymmetries”)
average.table
plot_multi_histogram <- function(df, feature, label_column) {
plt <- ggplot(df, aes(x=eval(parse(text=feature)), fill=eval(parse(text=label_column)))) +
geom_histogram(bins=10, alpha=0.4, position="identity", aes(y = ..density..), color="black") +
geom_density(alpha=0.2) +
labs(x=feature, y = "Density")
plt + guides(fill=guide_legend(title=label_column))
}
plot_multi_histogram(raw3, "abs.res.CPD", "SPP")
## Warning: The dot-dot notation (`..density..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(density)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
plot_multi_histogram(raw3, "abs.res.DbL", "SPP")
plot_multi_histogram(raw3, "abs.res.P1", "SPP")
plot_multi_histogram(raw3, "abs.res.P1.R", "SPP")
LOGAN: CHECK THAT EACH VARIABLES IS NEAR NORMALLY DISTRIBUTED. IF NOT, LOG TRANSFORM BEFORE PCA. ALSO CHECK THAT PCA CALCULATES Z SCORES AND PLOTS BASED ON THAT; IF NOT CONVERT TO Z SCORES THEN RUN PCA.
In this analysis, I will compare the principle components after centering and scaling the data. A PCA analysis will help us determine what aspects of morphology influence the variation in our data the most without worrying about differences in scales/measurements. Currently, data consists of 116 Sailfin and 186 Amazon.
(chart <- matrix(c("Variable chart:", "D: dorsal ray count", "P1: left pectoral ray count", "P2.R: right pelvic rays", "P2.L: left pelvic rays", "A: anal rays", "P1.R: right pectoral ray count", "LLSC: lateral line scale count", "SALL: scales above lateral line", "SBLL: scales below lateral line", "SBDF: scales before dorsal fin", "TL: total length", "SL: standard length", "BD: body depth", "CPD: caudal peduncle depth", "CPL: caudal peduncle length", "PreDL: pre-dorsal length", "DbL: dorsal base length", "HL/HW/HD: head length/width/depth", "SnL: snout length", "OL: ocular length"), ncol=1, byrow=TRUE))
## [,1]
## [1,] "Variable chart:"
## [2,] "D: dorsal ray count"
## [3,] "P1: left pectoral ray count"
## [4,] "P2.R: right pelvic rays"
## [5,] "P2.L: left pelvic rays"
## [6,] "A: anal rays"
## [7,] "P1.R: right pectoral ray count"
## [8,] "LLSC: lateral line scale count"
## [9,] "SALL: scales above lateral line"
## [10,] "SBLL: scales below lateral line"
## [11,] "SBDF: scales before dorsal fin"
## [12,] "TL: total length"
## [13,] "SL: standard length"
## [14,] "BD: body depth"
## [15,] "CPD: caudal peduncle depth"
## [16,] "CPL: caudal peduncle length"
## [17,] "PreDL: pre-dorsal length"
## [18,] "DbL: dorsal base length"
## [19,] "HL/HW/HD: head length/width/depth"
## [20,] "SnL: snout length"
## [21,] "OL: ocular length"
library(stringr)
raw1a <- subset(raw1, select = -c(17:18) ) #took out P2R and L since they don't vary at all
raw1a <- subset(raw1a, select=-c(34:49))#took out transformed values
raw1a <- raw1a[raw1a$SPP !="p.mexicana", ]
#PCA <- prcomp(raw1a[, 15:32], center=TRUE, scale. = TRUE) #includes all but pelvic traits, since they are the same in all species WILL SCALE/CENTER DATA ON MY OWN, LOGAN SAID IT IS NOT ALWAYS DOING IT FOR YOU EVEN WHEN YOU SPECIFY
z.scores <- scale(raw1a[, 15:33], center = TRUE, scale = TRUE)
raw1a <- cbind(raw1a, z.scores)
colnames(raw1a)[34:52] <- str_c( "z_", colnames(raw1a)[15:33] )
PCA <- prcomp(raw1a[, 34:52])
summary(PCA)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7
## Standard deviation 3.0060 1.6513 1.13817 1.0063 0.98398 0.93294 0.9164
## Proportion of Variance 0.4756 0.1435 0.06818 0.0533 0.05096 0.04581 0.0442
## Cumulative Proportion 0.4756 0.6191 0.68729 0.7406 0.79154 0.83735 0.8815
## PC8 PC9 PC10 PC11 PC12 PC13 PC14
## Standard deviation 0.77195 0.62502 0.53774 0.52991 0.47789 0.36370 0.32094
## Proportion of Variance 0.03136 0.02056 0.01522 0.01478 0.01202 0.00696 0.00542
## Cumulative Proportion 0.91292 0.93348 0.94870 0.96347 0.97549 0.98246 0.98788
## PC15 PC16 PC17 PC18 PC19
## Standard deviation 0.26619 0.23140 0.22476 0.21408 0.09781
## Proportion of Variance 0.00373 0.00282 0.00266 0.00241 0.00050
## Cumulative Proportion 0.99161 0.99443 0.99708 0.99950 1.00000
(loadings <- PCA$rotation[, 1:5])
## PC1 PC2 PC3 PC4 PC5
## z_D -0.027611454 0.50696042 0.316846373 0.004268289 0.0451948594
## z_P1 -0.017044616 -0.40629486 0.407394590 -0.158694227 0.0181695357
## z_A -0.005020094 -0.07376887 0.540729328 -0.201314661 0.3293774740
## z_P1.R -0.032231791 -0.42866103 0.329523179 -0.206896881 0.0422406542
## z_LLSC -0.082677110 0.26941040 0.257920561 -0.064891319 -0.3459045035
## z_SALL -0.032462947 0.02769819 -0.421393968 -0.750271497 0.2668361871
## z_SBLL -0.060734321 0.10636635 0.092417424 -0.499673683 -0.7078204585
## z_SBDF -0.012155738 -0.41285736 -0.182065170 0.186269654 -0.3943917571
## z_SL -0.326253905 -0.06512695 -0.006222250 0.031902000 0.0070316251
## z_BD -0.317297365 0.08179424 0.003451140 0.001252865 0.0214277313
## z_CPD -0.322925536 -0.01610715 -0.010833330 0.011968403 0.0441387621
## z_CPL -0.313607090 -0.07890894 0.016948842 0.006014639 0.0151385027
## z_PreDL -0.303401525 -0.19301409 -0.072565076 0.004503104 -0.0118454128
## z_DbL -0.270183630 0.27157825 0.123412046 0.024270785 0.0573524651
## z_HL -0.284188821 -0.04940833 0.043083088 0.118345879 -0.0714330997
## z_HD -0.318116728 0.01608269 -0.036677461 0.088305954 -0.0003382527
## z_HW -0.317336350 -0.03203899 -0.091660876 -0.053461231 0.0365683929
## z_SnL -0.211371089 0.03449487 -0.119575011 -0.095782278 0.1651886401
## z_OL -0.289852342 0.01631099 0.005189576 0.065814318 -0.0102182298
sorted.loadings.1 <- loadings[order(loadings[, 1]), 1]
myTitle <- "Loadings Plot for PC1"
myXlab <- "Variable Loadings"
dotchart(sorted.loadings.1, main=myTitle, xlab=myXlab, cex=1.5, col="red")
sorted.loadings.2 <- loadings[order(loadings[, 2]), 2]
myTitle <- "Loadings Plot for PC2"
myXlab <- "Variable Loadings"
dotchart(sorted.loadings.2, main=myTitle, xlab=myXlab, cex=1.5, col="red")
sorted.loadings.3 <- loadings[order(loadings[, 3]), 3]
myTitle <- "Loadings Plot for PC3"
myXlab <- "Variable Loadings"
dotchart(sorted.loadings.3, main=myTitle, xlab=myXlab, cex=1.5, col="red")
VM_PCA <- varimax(PCA$rotation)
VM_PCA
## $loadings
##
## Loadings:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7 PC8 PC9 PC10 PC11 PC12 PC13 PC14 PC15 PC16
## z_D 1
## z_P1 -1
## z_A -1
## z_P1.R 1
## z_LLSC 1
## z_SALL -1
## z_SBLL -1
## z_SBDF -1
## z_SL
## z_BD -1
## z_CPD
## z_CPL -1
## z_PreDL
## z_DbL -1
## z_HL -1
## z_HD 1
## z_HW -1
## z_SnL 1
## z_OL -1
## PC17 PC18 PC19
## z_D
## z_P1
## z_A
## z_P1.R
## z_LLSC
## z_SALL
## z_SBLL
## z_SBDF
## z_SL -1
## z_BD
## z_CPD 1
## z_CPL
## z_PreDL 1
## z_DbL
## z_HL
## z_HD
## z_HW
## z_SnL
## z_OL
##
## PC1 PC2 PC3 PC4 PC5 PC6 PC7 PC8 PC9 PC10
## SS loadings 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000
## Proportion Var 0.053 0.053 0.053 0.053 0.053 0.053 0.053 0.053 0.053 0.053
## Cumulative Var 0.053 0.105 0.158 0.211 0.263 0.316 0.368 0.421 0.474 0.526
## PC11 PC12 PC13 PC14 PC15 PC16 PC17 PC18 PC19
## SS loadings 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000
## Proportion Var 0.053 0.053 0.053 0.053 0.053 0.053 0.053 0.053 0.053
## Cumulative Var 0.579 0.632 0.684 0.737 0.789 0.842 0.895 0.947 1.000
##
## $rotmat
## [,1] [,2] [,3] [,4] [,5]
## [1,] 0.317297343 -0.027611519 -0.032231799 0.032462948 0.060734321
## [2,] -0.081794240 0.506958321 -0.428661280 -0.027698191 -0.106366351
## [3,] -0.003451144 0.316845491 0.329523336 0.421393965 -0.092417423
## [4,] -0.001252866 0.004269206 -0.206896944 0.750271499 0.499673680
## [5,] -0.021427730 0.045192887 0.042240631 -0.266836186 0.707820462
## [6,] -0.033199352 0.084874126 0.339505368 -0.075738673 0.227645062
## [7,] 0.018734883 -0.014279117 -0.143234816 -0.316962005 0.408272272
## [8,] 0.081515121 0.001317346 0.094884864 0.172335948 0.006136149
## [9,] 0.086053471 -0.094888708 0.604811684 0.043285201 0.025703038
## [10,] 0.082298923 -0.596815749 -0.319002991 0.122286170 -0.066628666
## [11,] -0.064315969 -0.266497357 0.088260922 0.113679155 0.002350098
## [12,] -0.199000682 -0.168409940 -0.109978037 0.114505210 -0.015803049
## [13,] 0.146406946 0.368403646 -0.154706781 0.031430840 -0.008919745
## [14,] -0.177789590 0.160248758 0.054361842 0.057852593 0.016148244
## [15,] 0.549052642 0.031131568 -0.005311928 -0.031237105 -0.025111418
## [16,] -0.533151750 0.015246320 0.044593475 -0.039582409 -0.017507414
## [17,] -0.134563867 -0.017296410 0.026917693 -0.005143577 0.023484994
## [18,] 0.409407404 0.057485727 0.010122100 -0.023982002 -0.003065806
## [19,] 0.040863855 0.039849292 -0.008525031 0.015982366 0.005757158
## [,6] [,7] [,8] [,9] [,10]
## [1,] 0.005020095 -0.0826771102 -0.2113710896 0.01704460 0.012155597
## [2,] 0.073768892 0.2694103995 0.0344948667 0.40629465 0.412859881
## [3,] -0.540729342 0.2579205576 -0.1195750109 -0.40739442 0.182066771
## [4,] 0.201314670 -0.0648913172 -0.0957822783 0.15869412 -0.186269642
## [5,] -0.329377475 -0.3459045034 0.1651886405 -0.01816951 0.394391980
## [6,] 0.695717088 0.2430485445 -0.1426356251 -0.35501348 0.283357640
## [7,] -0.182845183 0.7086313178 -0.0856357684 -0.01425311 -0.392099171
## [8,] 0.083464939 0.1945768969 0.9254128628 -0.04126985 -0.053629177
## [9,] -0.047206773 0.1610503786 -0.0563727681 0.68628339 0.141114912
## [10,] -0.056664418 0.2573400377 0.0041307525 -0.12365605 0.559951002
## [11,] -0.060522778 0.1644153137 -0.0855462428 0.15143386 0.098564204
## [12,] -0.122877420 0.0573623191 -0.0629780054 0.04188635 -0.068117962
## [13,] 0.028601093 -0.0146958750 -0.0348630955 -0.01361745 0.095042256
## [14,] 0.005336033 0.0673024855 -0.0343885086 -0.02425436 0.090588582
## [15,] -0.017521042 -0.0160541318 0.0194603855 -0.03407002 -0.011571288
## [16,] -0.001583864 -0.0253270853 0.0330496288 0.02372218 -0.003245923
## [17,] 0.053905075 0.0235649576 0.0130757537 -0.02659535 0.057593745
## [18,] 0.020861507 0.0492294468 -0.0280211875 0.03169220 0.022801105
## [19,] 0.002904024 -0.0001237678 -0.0008865391 -0.00614236 -0.008102588
## [,11] [,12] [,13] [,14] [,15]
## [1,] 0.284188822 0.289852341 0.270183535 0.313607170 -0.3181167405
## [2,] 0.049408329 -0.016310991 -0.271578270 0.078908863 0.0160826901
## [3,] -0.043083089 -0.005189575 -0.123412038 -0.016948881 -0.0366774614
## [4,] -0.118345879 -0.065814318 -0.024270784 -0.006014645 0.0883059542
## [5,] 0.071433100 0.010218230 -0.057352461 -0.015138520 -0.0003382518
## [6,] 0.088786413 0.099107375 -0.104228251 -0.107166009 -0.0104924871
## [7,] -0.069409994 -0.009030853 0.074724932 -0.009548008 -0.0011071362
## [8,] 0.122862078 0.083007091 0.079770214 0.044315784 -0.0993405367
## [9,] -0.202033605 0.158816407 0.086072311 -0.101837019 -0.0023689054
## [10,] -0.256892530 0.056781186 0.183357953 -0.054206278 0.0550460305
## [11,] 0.707467701 -0.559934803 0.004098545 -0.035627345 -0.0852182700
## [12,] 0.417230185 0.733681963 -0.231635600 -0.144581746 0.0143546478
## [13,] 0.134710248 0.048549915 0.534905440 -0.461096407 -0.2354299703
## [14,] 0.054606670 0.061471281 0.506376680 0.522851666 0.3755756322
## [15,] 0.193184905 0.012736389 -0.131043940 -0.143779179 0.7057400097
## [16,] 0.136668417 0.061797571 0.243053603 -0.128765070 0.3987513789
## [17,] 0.033308278 0.005670456 -0.285212900 0.430588125 -0.1009323392
## [18,] 0.091496716 0.063363555 -0.022677103 0.292414130 0.0322133126
## [19,] 0.004864163 -0.005972848 -0.126399143 -0.227377683 0.0643044550
## [,16] [,17] [,18] [,19]
## [1,] 0.317336365 -0.303401523 -0.32292554 0.326253904
## [2,] 0.032038987 -0.193014092 -0.01610715 0.065126951
## [3,] 0.091660877 -0.072565076 -0.01083333 0.006222250
## [4,] 0.053461230 0.004503104 0.01196840 -0.031902000
## [5,] -0.036568394 -0.011845413 0.04413876 -0.007031625
## [6,] -0.023989842 -0.084992549 0.03355338 -0.004148751
## [7,] 0.034737593 0.014188829 -0.05385036 0.005024191
## [8,] 0.034667234 -0.052695539 -0.03905880 0.055973637
## [9,] 0.001859124 0.062053052 -0.06722902 -0.028614023
## [10,] 0.061378505 0.068042500 -0.01929206 0.004267214
## [11,] -0.090277088 0.013033321 0.04970381 -0.009348505
## [12,] -0.238749387 0.066919800 0.12735128 -0.114005731
## [13,] -0.023267472 0.397107360 -0.09823307 -0.252633948
## [14,] -0.471413982 0.086013679 0.05219274 0.107938043
## [15,] 0.014469532 -0.018473582 -0.32713631 -0.112845118
## [16,] 0.670025615 -0.011242857 0.02814313 0.048345250
## [17,] 0.177321016 0.638366089 -0.46203158 -0.205245694
## [18,] 0.326997687 0.277758985 0.72713571 -0.104772709
## [19,] -0.059005074 0.437852518 0.04165650 0.852915509
library(AMR)
library(ggplot2)
library(ggfortify)
(PCA$sdev ^ 2)
## [1] 9.036162582 2.726854756 1.295436311 1.012615035 0.968211759 0.870368929
## [7] 0.839852611 0.595899167 0.390647784 0.289165914 0.280806228 0.228376587
## [13] 0.132280993 0.103004154 0.070858659 0.053545286 0.050517140 0.045829712
## [19] 0.009566394
evplot <- function(ev)
{
# Broken stick model (MacArthur 1957)
n <- length(ev)
bsm <- data.frame(j=seq(1:n), p=0)
bsm$p[1] <- 1/n
for (i in 2:n) bsm$p[i] <- bsm$p[i-1] + (1/(n + 1 - i))
bsm$p <- 100*bsm$p/n
# Plot eigenvalues and % of variation for each axis
op <- par(mfrow=c(2,1))
barplot(ev, main="Eigenvalues", col="bisque", las=2)
abline(h=mean(ev), col="red")
legend("topright", "Average eigenvalue", lwd=1, col=2, bty="n")
barplot(t(cbind(100*ev/sum(ev), bsm$p[n:1])), beside=TRUE,
main="% variation", col=c("bisque",2), las=2)
legend("topright", c("% eigenvalue", "Broken stick model"),
pch=15, col=c("bisque",2), bty="n")
par(op)
}
ev <- PCA$sdev^2
evplot(ev) #according to Kaiser-Guttman criteron, we can use the first 4 PCs, even though the broken stick model shows only the first above the red bar plot... not 100% confident I know what this means, but pretty sure PC1 is body size
plot1<- autoplot(PCA, data = raw1a, colour='SPP', loadings=FALSE, loadings.label=FALSE, frame=TRUE, frame.type='norm')+ ggtitle("PCA Plot of Morphology traits") + theme_minimal()
plot1
plot2<- autoplot(PCA, data = raw1a, colour='QUARTILE', shape="SPP", frame=TRUE, frame.type='norm')+ ggtitle("PCA Plot of Morphology traits") + theme_minimal()
plot2
plot3<- autoplot(PCA, data = raw1a, colour='BASIN', shape="SPP", frame=TRUE, frame.type='norm')+ ggtitle("PCA Plot of Morphology traits") + theme_minimal()
plot3
plot4<- autoplot(PCA, data = raw1a, colour='WATERSHED', shape="SPP", frame=TRUE, frame.type='norm')+ ggtitle("PCA Plot of Morphology traits") + theme_minimal()
plot4
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
plot5<- autoplot(PCA, x=2, y=3, data = raw1a, colour='SPP', loadings=FALSE, loadings.label=FALSE, frame=TRUE, frame.type='norm')+ ggtitle("PCA Plot of Morphology traits") + theme_minimal()
plot5
plot6<- autoplot(PCA, x=2, y=3, data = raw1a, colour='QUARTILE', shape="SPP", frame=TRUE, frame.type='norm')+ ggtitle("PCA Plot of Morphology traits") + theme_minimal()
plot6
plot7<- autoplot(PCA, x=2, y=3, data = raw1a, colour='BASIN', shape="SPP", frame=TRUE, frame.type='norm')+ ggtitle("PCA Plot of Morphology traits") + theme_minimal()
plot7
plot8<- autoplot(PCA, x=2, y=3, data = raw1a, colour='WATERSHED', shape="SPP", frame=TRUE, frame.type='norm')+ ggtitle("PCA Plot of Morphology traits") + theme_minimal()
plot8
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
I will try to compare the area of the density clusters for the PCA. I will attempt to do this use EMD (earth mover’s distance). From what I understand, this will basically tell me how much “work” is needed to transform one distribution into another, thus providing a metric of the overall difference in shape between two distributions.
To do this, I would need to be able to separate the loadings based on species, but I have no idea how to do that without running two separate PCAs, one for each group (which sorta defeats the point I think).
library(emdist) OR library(energy)
Now I will repeat analysis on a dataset that filters for fish above 32.5mm (this is the average for the range I found in literature, 28-37mm). This would confirm that all fish are likely adults, and remove any bias from juvenile proportions (ex. if juvenile head is proportionately larger than expected for body).
Data collection
library(dplyr)
rawF <- filter(raw1,SL>=32.5)
rawF1 <- rawF[-c(150), ] #this takes out the monster fish too
I will use Shapiro-wilke, histograms, and QQ plots to determine what traits are normal. These will only be performed on continuous variables, as discrete variables are not normal by nature.
Conclusions: literally all of them are NOT normal… will log transform them and run parametric tests (t-test, F-test, ANOVA). Not sure what to do with the discrete variables…[Logan said to use non-parametric OR a glmer with poisson distribution]
shapiro.test(rawF$SL)
##
## Shapiro-Wilk normality test
##
## data: rawF$SL
## W = 0.88785, p-value = 9.323e-12
shapiro.test(rawF$BD)
##
## Shapiro-Wilk normality test
##
## data: rawF$BD
## W = 0.94379, p-value = 1.537e-07
shapiro.test(rawF$CPD)
##
## Shapiro-Wilk normality test
##
## data: rawF$CPD
## W = 0.9366, p-value = 3.381e-08
shapiro.test(rawF$CPL)
##
## Shapiro-Wilk normality test
##
## data: rawF$CPL
## W = 0.91011, p-value = 2.741e-10
shapiro.test(rawF$PreDL)
##
## Shapiro-Wilk normality test
##
## data: rawF$PreDL
## W = 0.94232, p-value = 1.118e-07
shapiro.test(rawF$DbL)
##
## Shapiro-Wilk normality test
##
## data: rawF$DbL
## W = 0.98676, p-value = 0.03793
shapiro.test(rawF$HL)
##
## Shapiro-Wilk normality test
##
## data: rawF$HL
## W = 0.87215, p-value = 1.115e-12
shapiro.test(rawF$HD)
##
## Shapiro-Wilk normality test
##
## data: rawF$HD
## W = 0.91737, p-value = 9.289e-10
shapiro.test(rawF$HW)
##
## Shapiro-Wilk normality test
##
## data: rawF$HW
## W = 0.92699, p-value = 5.211e-09
shapiro.test(rawF$SnL)
##
## Shapiro-Wilk normality test
##
## data: rawF$SnL
## W = 0.56341, p-value < 2.2e-16
shapiro.test(rawF$OL)
##
## Shapiro-Wilk normality test
##
## data: rawF$OL
## W = 0.9821, p-value = 0.006707
shapiro.test(rawF1$SL)
##
## Shapiro-Wilk normality test
##
## data: rawF1$SL
## W = 0.90902, p-value = 2.458e-10
shapiro.test(rawF1$BD)
##
## Shapiro-Wilk normality test
##
## data: rawF1$BD
## W = 0.95312, p-value = 1.374e-06
shapiro.test(rawF1$CPD)
##
## Shapiro-Wilk normality test
##
## data: rawF1$CPD
## W = 0.95682, p-value = 3.411e-06
shapiro.test(rawF1$CPL)
##
## Shapiro-Wilk normality test
##
## data: rawF1$CPL
## W = 0.92307, p-value = 2.703e-09
shapiro.test(rawF1$PreDL)
##
## Shapiro-Wilk normality test
##
## data: rawF1$PreDL
## W = 0.95065, p-value = 7.638e-07
shapiro.test(rawF1$DbL)
##
## Shapiro-Wilk normality test
##
## data: rawF1$DbL
## W = 0.98778, p-value = 0.05693
shapiro.test(rawF1$HL)
##
## Shapiro-Wilk normality test
##
## data: rawF1$HL
## W = 0.88718, p-value = 9.126e-12
shapiro.test(rawF1$HD)
##
## Shapiro-Wilk normality test
##
## data: rawF1$HD
## W = 0.95302, p-value = 1.34e-06
shapiro.test(rawF1$HW)
##
## Shapiro-Wilk normality test
##
## data: rawF1$HW
## W = 0.95207, p-value = 1.068e-06
shapiro.test(rawF1$SnL)
##
## Shapiro-Wilk normality test
##
## data: rawF1$SnL
## W = 0.55479, p-value < 2.2e-16
shapiro.test(rawF1$OL)
##
## Shapiro-Wilk normality test
##
## data: rawF1$OL
## W = 0.98583, p-value = 0.02724
hist(rawF$SL)
hist(rawF$BD)
hist(rawF$CPD)
hist(rawF$CPL)
hist(rawF$PreDL)
hist(rawF$DbL)
hist(rawF$HL)
hist(rawF$HD)
hist(rawF$HW)
hist(rawF$SnL)
hist(rawF$OL)
hist(rawF1$SL)
hist(rawF1$BD)
hist(rawF1$CPD)
hist(rawF1$CPL)
hist(rawF1$PreDL)
hist(rawF1$DbL)
hist(rawF1$HL)
hist(rawF1$HD)
hist(rawF1$HW)
hist(rawF1$SnL)
hist(rawF1$OL)
qqnorm(rawF$SL)
qqline(rawF$SL)
qqnorm(rawF$BD)
qqline(rawF$BD)
qqnorm(rawF$CPD)
qqline(rawF$CPD)
qqnorm(rawF$CPL)
qqline(rawF$CPL)
qqnorm(rawF$PreDL)
qqline(rawF$PreDL)
qqnorm(rawF$DbL)
qqline(rawF$DbL)
qqnorm(rawF$HL)
qqline(rawF$HL)
qqnorm(rawF$HD)
qqline(rawF$HD)
qqnorm(rawF$HW)
qqline(rawF$HW)
qqnorm(rawF$SnL)
qqline(rawF$SnL)
qqnorm(rawF$OL)
qqline(rawF$OL)
Since all of the continuous characters were not normal, I will log transform them, and proceed with the transformed data in all analyses.
Log transformations did not work on ANY of them. Will try square/cubed root. Square didn’t work…
rawF[paste0(names(rawF), '_cb')] <- '^'(rawF[, 25:35], 1/3)
#for some reason did the log of the whole data frame instead of those specific columns, so I will just remove the unnecessary columns instead of spending time on what went wrong.
rawF <- subset(rawF, select = -c(37:50) )
#rawF <- subset(rawF, select = -c(59:94) )
#double checking normality with a SW test and QQ plot
shapiro.test(rawF$SL_cb)
##
## Shapiro-Wilk normality test
##
## data: rawF$SL_cb
## W = 0.97157, p-value = 0.0001981
shapiro.test(rawF$BD_cb)
##
## Shapiro-Wilk normality test
##
## data: rawF$BD_cb
## W = 0.93998, p-value = 6.798e-08
shapiro.test(rawF$CPD_cb)
##
## Shapiro-Wilk normality test
##
## data: rawF$CPD_cb
## W = 0.96548, p-value = 3.283e-05
shapiro.test(rawF$CPL_cb)
##
## Shapiro-Wilk normality test
##
## data: rawF$CPL_cb
## W = 0.98957, p-value = 0.1103
shapiro.test(rawF$PreDL_cb)
##
## Shapiro-Wilk normality test
##
## data: rawF$PreDL_cb
## W = 0.77086, p-value < 2.2e-16
shapiro.test(rawF$DbL_cb)
##
## Shapiro-Wilk normality test
##
## data: rawF$DbL_cb
## W = 0.95806, p-value = 4.462e-06
shapiro.test(rawF$HL_cb)
##
## Shapiro-Wilk normality test
##
## data: rawF$HL_cb
## W = 0.96324, p-value = 1.76e-05
shapiro.test(rawF$HD_cb)
##
## Shapiro-Wilk normality test
##
## data: rawF$HD_cb
## W = 0.80411, p-value = 5.928e-16
shapiro.test(rawF$HW_cb)
##
## Shapiro-Wilk normality test
##
## data: rawF$HW_cb
## W = 0.98405, p-value = 0.01368
shapiro.test(rawF$SnL_cb)
##
## Shapiro-Wilk normality test
##
## data: rawF$SnL_cb
## W = 0.91915, p-value = 1.265e-09
shapiro.test(rawF$OL_cb)
##
## Shapiro-Wilk normality test
##
## data: rawF$OL_cb
## W = 0.97715, p-value = 0.001192
qqnorm(rawF$SL_cb)
qqline(rawF$SL_cb)
qqnorm(rawF$BD_cb)
qqline(rawF$BD_cb)
qqnorm(rawF$CPD_cb)
qqline(rawF$CPD_cb)
qqnorm(rawF$CPL_cb)
qqline(rawF$CPL_cb)
qqnorm(rawF$PreDL_cb)
qqline(rawF$PreDL_cb)
qqnorm(rawF$DbL_cb)
qqline(rawF$DbL_cb)
qqnorm(rawF$HL_cb)
qqline(rawF$HL_cb)
qqnorm(rawF$HD_cb)
qqline(rawF$HD_cb)
qqnorm(rawF$HW_cb)
qqline(rawF$HW_cb)
qqnorm(rawF$SnL_cb)
qqline(rawF$SnL_cb)
qqnorm(rawF$OL_cb)
qqline(rawF$OL_cb)
rawF1[paste0(names(rawF1), '_cb')] <- '^'(rawF1[, 25:35], 1/3)
#for some reason did the log of the whole data frame instead of those specific columns, so I will just remove the unnecessary columns instead of spending time on what went wrong.
rawF1 <- subset(rawF1, select = -c(37:50) )
#rawF <- subset(rawF, select = -c(59:94) )
#double checking normality with a SW test and QQ plot
shapiro.test(rawF1$SL_cb)
##
## Shapiro-Wilk normality test
##
## data: rawF1$SL_cb
## W = 0.98054, p-value = 0.003949
shapiro.test(rawF1$BD_cb)
##
## Shapiro-Wilk normality test
##
## data: rawF1$BD_cb
## W = 0.94599, p-value = 2.634e-07
shapiro.test(rawF1$CPD_cb)
##
## Shapiro-Wilk normality test
##
## data: rawF1$CPD_cb
## W = 0.96782, p-value = 6.7e-05
shapiro.test(rawF1$CPL_cb)
##
## Shapiro-Wilk normality test
##
## data: rawF1$CPL_cb
## W = 0.98884, p-value = 0.08506
shapiro.test(rawF1$PreDL_cb)
##
## Shapiro-Wilk normality test
##
## data: rawF1$PreDL_cb
## W = 0.76465, p-value < 2.2e-16
shapiro.test(rawF1$DbL_cb)
##
## Shapiro-Wilk normality test
##
## data: rawF1$DbL_cb
## W = 0.97784, p-value = 0.001557
shapiro.test(rawF1$HL_cb)
##
## Shapiro-Wilk normality test
##
## data: rawF1$HL_cb
## W = 0.97571, p-value = 0.000764
shapiro.test(rawF1$HD_cb)
##
## Shapiro-Wilk normality test
##
## data: rawF1$HD_cb
## W = 0.79858, p-value = 3.838e-16
shapiro.test(rawF1$HW_cb)
##
## Shapiro-Wilk normality test
##
## data: rawF1$HW_cb
## W = 0.98473, p-value = 0.01803
shapiro.test(rawF1$SnL_cb)
##
## Shapiro-Wilk normality test
##
## data: rawF1$SnL_cb
## W = 0.9311, p-value = 1.208e-08
shapiro.test(rawF1$OL_cb)
##
## Shapiro-Wilk normality test
##
## data: rawF1$OL_cb
## W = 0.98222, p-value = 0.007199
qqnorm(rawF1$SL_cb)
qqline(rawF1$SL_cb)
qqnorm(rawF1$BD_cb)
qqline(rawF1$BD_cb)
qqnorm(rawF1$CPD_cb)
qqline(rawF1$CPD_cb)
qqnorm(rawF1$CPL_cb)
qqline(rawF1$CPL_cb)
qqnorm(rawF1$PreDL_cb)
qqline(rawF1$PreDL_cb)
qqnorm(rawF1$DbL_cb)
qqline(rawF1$DbL_cb)
qqnorm(rawF1$HL_cb)
qqline(rawF1$HL_cb)
qqnorm(rawF1$HD_cb)
qqline(rawF1$HD_cb)
qqnorm(rawF1$HW_cb)
qqline(rawF1$HW_cb)
qqnorm(rawF1$SnL_cb)
qqline(rawF1$SnL_cb)
qqnorm(rawF1$OL_cb)
qqline(rawF1$OL_cb)
Since amazons are in general bigger than sailfin, we don’t want any results to be due to this difference in body size bias. Therefore, we will see what traits are influenced by body size (regressions) and correct for body size when necessary (absolute value of residuals). We can then use the residuals when comparing between species for traits that are influenced by body size, and raw/transformed data for traits that are not influenced by body size. I will also calculate standardized residuals to compare residuals across traits in later analyses.
Quick results summary: traits not influenced by body size are left & right pelvic, anal, scales above and below lateral line (except in mexicana), scales before dorsal fin and fluctuating asymmetry; all other traits influenced by body size.
library(ggplot2)
library(ggpubr)
lat.F <- rawF[rawF$SPP == "p.latipinna",]
form.F <- rawF[rawF$SPP == "p.formosa",]
mex.F <- rawF[rawF$SPP == "p.mexicana",]
##### LAT #####
F.reg.lat.D <- lm(lat.F$D ~ lat.F$SL)
F.sd.lat.D <- rstandard(F.reg.lat.D)
F.reg.lat.D.plot <- ggplot(lat.F, aes(x = SL, y = D)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
F.reg.lat.D.plot
## `geom_smooth()` using formula = 'y ~ x'
F.reg.lat.P1 <- lm(lat.F$P1 ~ lat.F$SL)
F.sd.lat.P1 <- rstandard(F.reg.lat.P1)
F.reg.lat.P1.plot <- ggplot(lat.F, aes(x = SL, y = P1)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
F.reg.lat.P1.plot
## `geom_smooth()` using formula = 'y ~ x'
F.reg.lat.P2.L <- lm(lat.F$P2.L ~ lat.F$SL)
F.sd.lat.P2.L <- rstandard(F.reg.lat.P2.L)
F.reg.lat.P2.L.plot <- ggplot(lat.F, aes(x = SL, y = P2.L)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
F.reg.lat.P2.L.plot
## `geom_smooth()` using formula = 'y ~ x'
F.reg.lat.P2.R <- lm(lat.F$P2.R ~ lat.F$SL)
F.sd.lat.P2.R <- rstandard(F.reg.lat.P2.R)
F.reg.lat.P2.R.plot <- ggplot(lat.F, aes(x = SL, y = P2.R)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
F.reg.lat.P2.R.plot
## `geom_smooth()` using formula = 'y ~ x'
F.reg.lat.A <- lm(lat.F$A ~ lat.F$SL)
F.sd.lat.A <- rstandard(F.reg.lat.A)
F.reg.lat.A.plot <- ggplot(lat.F, aes(x = SL, y = A)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
F.reg.lat.A.plot
## `geom_smooth()` using formula = 'y ~ x'
F.reg.lat.P1.R <- lm(lat.F$P1.R ~ lat.F$SL)
F.sd.lat.P1.R <- rstandard(F.reg.lat.P1.R)
F.reg.lat.P1.R.plot <- ggplot(lat.F, aes(x = SL, y = P1.R)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
F.reg.lat.P1.R.plot
## `geom_smooth()` using formula = 'y ~ x'
F.reg.lat.LLSC <- lm(lat.F$LLSC ~ lat.F$SL)
F.sd.lat.LLSC <- rstandard(F.reg.lat.LLSC)
F.reg.lat.LLSC.plot <- ggplot(lat.F, aes(x = SL, y = LLSC)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
F.reg.lat.LLSC.plot
## `geom_smooth()` using formula = 'y ~ x'
F.reg.lat.SALL <- lm(lat.F$SALL ~ lat.F$SL)
F.sd.lat.SALL <- rstandard(F.reg.lat.SALL)
F.reg.lat.SALL.plot <- ggplot(lat.F, aes(x = SL, y = SALL)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
F.reg.lat.SALL.plot
## `geom_smooth()` using formula = 'y ~ x'
F.reg.lat.SBLL <- lm(lat.F$SBLL ~ lat.F$SL)
F.sd.lat.SBLL <- rstandard(F.reg.lat.SBLL)
F.reg.lat.SBLL.plot <- ggplot(lat.F, aes(x = SL, y = SBLL)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
F.reg.lat.SBLL.plot
## `geom_smooth()` using formula = 'y ~ x'
F.reg.lat.SBDF <- lm(lat.F$SBDF ~ lat.F$SL)
F.sd.lat.SBDF <- rstandard(F.reg.lat.SBDF)
F.reg.lat.SBDF.plot <- ggplot(lat.F, aes(x = SL, y = SBDF)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
F.reg.lat.SBDF.plot
## `geom_smooth()` using formula = 'y ~ x'
F.reg.lat.BD <- lm(lat.F$BD_cb ~ lat.F$SL_cb)
F.sd.lat.BD <- rstandard(F.reg.lat.BD)
F.reg.lat.BD.plot <- ggplot(lat.F, aes(x = SL_cb, y = BD_cb)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
F.reg.lat.BD.plot
## `geom_smooth()` using formula = 'y ~ x'
F.reg.lat.CPD <- lm(lat.F$CPD_cb ~ lat.F$SL_cb)
F.sd.lat.CPD <- rstandard(F.reg.lat.CPD)
F.reg.lat.CPD.plot <- ggplot(lat.F, aes(x = SL_cb, y = CPD_cb)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
F.reg.lat.CPD.plot
## `geom_smooth()` using formula = 'y ~ x'
F.reg.lat.CPL <- lm(lat.F$CPL_cb ~ lat.F$SL_cb)
F.sd.lat.CPL <- rstandard(F.reg.lat.CPL)
F.reg.lat.CPL.plot <- ggplot(lat.F, aes(x = SL_cb, y = CPL_cb)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
F.reg.lat.CPL.plot
## `geom_smooth()` using formula = 'y ~ x'
F.reg.lat.PreDL <- lm(lat.F$PreDL_cb ~ lat.F$SL_cb)
F.sd.lat.PreDL <- rstandard(F.reg.lat.PreDL)
F.reg.lat.PreDL.plot <- ggplot(lat.F, aes(x = SL_cb, y = PreDL_cb)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
F.reg.lat.PreDL.plot
## `geom_smooth()` using formula = 'y ~ x'
F.reg.lat.DbL <- lm(lat.F$DbL_cb ~ lat.F$SL_cb)
F.sd.lat.DbL <- rstandard(F.reg.lat.DbL)
F.reg.lat.DbL.plot <- ggplot(lat.F, aes(x = SL_cb, y = DbL_cb)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
F.reg.lat.DbL.plot
## `geom_smooth()` using formula = 'y ~ x'
F.reg.lat.HL <- lm(lat.F$HL_cb ~ lat.F$SL_cb)
F.sd.lat.HL <- rstandard(F.reg.lat.HL)
F.reg.lat.HL.plot <- ggplot(lat.F, aes(x = SL_cb, y = HL_cb)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
F.reg.lat.HL.plot
## `geom_smooth()` using formula = 'y ~ x'
F.reg.lat.HD <- lm(lat.F$HD_cb ~ lat.F$SL_cb)
F.sd.lat.HD <- rstandard(F.reg.lat.HD)
F.reg.lat.HD.plot <- ggplot(lat.F, aes(x = SL_cb, y = HD_cb)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
F.reg.lat.HD.plot
## `geom_smooth()` using formula = 'y ~ x'
F.reg.lat.HW <- lm(lat.F$HW_cb ~ lat.F$SL_cb)
F.sd.lat.HW <- rstandard(F.reg.lat.HW)
F.reg.lat.HW.plot <- ggplot(lat.F, aes(x = SL_cb, y = HW_cb)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
F.reg.lat.HW.plot
## `geom_smooth()` using formula = 'y ~ x'
F.reg.lat.SnL <- lm(lat.F$SnL_cb ~ lat.F$SL_cb)
F.sd.lat.SnL <- rstandard(F.reg.lat.SnL)
F.reg.lat.SnL.plot <- ggplot(lat.F, aes(x = SL_cb, y = SnL_cb)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
F.reg.lat.SnL.plot
## `geom_smooth()` using formula = 'y ~ x'
F.reg.lat.OL <- lm(lat.F$OL_cb ~ lat.F$SL_cb)
F.sd.lat.OL <- rstandard(F.reg.lat.OL)
F.reg.lat.OL.plot <- ggplot(lat.F, aes(x = SL_cb, y = OL_cb)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
F.reg.lat.OL.plot
## `geom_smooth()` using formula = 'y ~ x'
F.reg.lat.FLA <- lm(lat.F$FLA ~ lat.F$SL)
F.sd.lat.FLA <- rstandard(F.reg.lat.FLA)
F.reg.lat.FLA.plot <- ggplot(lat.F, aes(x = SL, y = FLA)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
F.reg.lat.FLA.plot
## `geom_smooth()` using formula = 'y ~ x'
##### FORM #####
F.reg.form.D <- lm(form.F$D ~ form.F$SL)
F.sd.form.D <- rstandard(F.reg.form.D)
F.reg.form.D.plot <- ggplot(form.F, aes(x =SL, y = D)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
F.reg.form.D.plot
## `geom_smooth()` using formula = 'y ~ x'
F.reg.form.P1 <- lm(form.F$P1 ~ form.F$SL)
F.sd.form.P1 <- rstandard(F.reg.form.P1)
F.reg.form.P1.plot <- ggplot(form.F, aes(x = SL, y = P1)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
F.reg.form.P1.plot
## `geom_smooth()` using formula = 'y ~ x'
F.reg.form.P2.L <- lm(form.F$P2.L ~ form.F$SL)
F.sd.form.P2.L <- rstandard(F.reg.form.P2.L)
F.reg.form.P2.L.plot <- ggplot(form.F, aes(x = SL, y = P2.L)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
F.reg.form.P2.L.plot
## `geom_smooth()` using formula = 'y ~ x'
F.reg.form.P2.R <- lm(form.F$P2.R ~ form.F$SL)
F.sd.form.P2.R <- rstandard(F.reg.form.P2.R)
F.reg.form.P2.R.plot <- ggplot(form.F, aes(x = SL, y = P2.R)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
F.reg.form.P2.R.plot
## `geom_smooth()` using formula = 'y ~ x'
F.reg.form.A <- lm(form.F$A ~ form.F$SL)
F.sd.form.A <- rstandard(F.reg.form.A)
F.reg.form.A.plot <- ggplot(form.F, aes(x = SL, y = A)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
F.reg.form.A.plot
## `geom_smooth()` using formula = 'y ~ x'
F.reg.form.P1.R <- lm(form.F$P1.R ~ form.F$SL)
F.sd.form.P1.R <- rstandard(F.reg.form.P1.R)
F.reg.form.P1.R.plot <- ggplot(form.F, aes(x = SL, y = P1.R)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
F.reg.form.P1.R.plot
## `geom_smooth()` using formula = 'y ~ x'
F.reg.form.LLSC <- lm(form.F$LLSC ~ form.F$SL)
F.sd.form.LLSC <- rstandard(F.reg.form.LLSC)
F.reg.form.LLSC.plot <- ggplot(form.F, aes(x = SL, y = LLSC)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
F.reg.form.LLSC.plot
## `geom_smooth()` using formula = 'y ~ x'
F.reg.form.SALL <- lm(form.F$SALL ~ form.F$SL)
F.sd.form.SALL <- rstandard(F.reg.form.SALL)
F.reg.form.SALL.plot <- ggplot(form.F, aes(x = SL, y = SALL)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
F.reg.form.SALL.plot
## `geom_smooth()` using formula = 'y ~ x'
F.reg.form.SBLL <- lm(form.F$SBLL ~ form.F$SL)
F.sd.form.SBLL <- rstandard(F.reg.form.SBLL)
F.reg.form.SBLL.plot <- ggplot(form.F, aes(x = SL, y = SBLL)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
F.reg.form.SBLL.plot
## `geom_smooth()` using formula = 'y ~ x'
F.reg.form.SBDF <- lm(form.F$SBDF ~ form.F$SL)
F.sd.form.SBDF <- rstandard(F.reg.form.SBDF)
F.reg.form.SBDF.plot <- ggplot(form.F, aes(x = SL, y = SBDF)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
F.reg.form.SBDF.plot
## `geom_smooth()` using formula = 'y ~ x'
F.reg.form.BD <- lm(form.F$BD_cb ~ form.F$SL_cb)
F.sd.form.BD <- rstandard(F.reg.form.BD)
F.reg.form.BD.plot <- ggplot(form.F, aes(x = SL_cb, y = BD_cb)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
F.reg.form.BD.plot
## `geom_smooth()` using formula = 'y ~ x'
F.reg.form.CPD <- lm(form.F$CPD_cb ~ form.F$SL_cb)
F.sd.form.CPD <- rstandard(F.reg.form.CPD)
F.reg.form.CPD.plot <- ggplot(form.F, aes(x = SL_cb, y = CPD_cb)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
F.reg.form.CPD.plot
## `geom_smooth()` using formula = 'y ~ x'
F.reg.form.CPL <- lm(form.F$CPL_cb ~ form.F$SL_cb)
F.sd.form.CPL <- rstandard(F.reg.form.CPL)
F.reg.form.CPL.plot <- ggplot(form.F, aes(x = SL_cb, y = CPL_cb)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
F.reg.form.CPL.plot
## `geom_smooth()` using formula = 'y ~ x'
F.reg.form.PreDL <- lm(form.F$PreDL_cb ~ form.F$SL_cb)
F.sd.form.PreDL <- rstandard(F.reg.form.PreDL)
F.reg.form.PreDL.plot <- ggplot(form.F, aes(x = SL_cb, y = PreDL_cb)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
F.reg.form.PreDL.plot
## `geom_smooth()` using formula = 'y ~ x'
F.reg.form.DbL <- lm(form.F$DbL_cb ~ form.F$SL_cb)
F.sd.form.DbL <- rstandard(F.reg.form.DbL)
F.reg.form.DbL.plot <- ggplot(form.F, aes(x = SL_cb, y = DbL_cb)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
F.reg.form.DbL.plot
## `geom_smooth()` using formula = 'y ~ x'
F.reg.form.HL <- lm(form.F$HL_cb ~ form.F$SL_cb)
F.sd.form.HL <- rstandard(F.reg.form.HL)
F.reg.form.HL.plot <- ggplot(form.F, aes(x = SL_cb, y = HL_cb)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
F.reg.form.HL.plot
## `geom_smooth()` using formula = 'y ~ x'
F.reg.form.HD <- lm(form.F$HD_cb ~ form.F$SL_cb)
F.sd.form.HD <- rstandard(F.reg.form.HD)
F.reg.form.HD.plot <- ggplot(form.F, aes(x = SL_cb, y = HD_cb)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
F.reg.form.HD.plot
## `geom_smooth()` using formula = 'y ~ x'
F.reg.form.HW <- lm(form.F$HW_cb ~ form.F$SL_cb)
F.sd.form.HW <- rstandard(F.reg.form.HW)
F.reg.form.HW.plot <- ggplot(form.F, aes(x = SL_cb, y = HW_cb)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
F.reg.form.HW.plot
## `geom_smooth()` using formula = 'y ~ x'
F.reg.form.SnL <- lm(form.F$SnL_cb ~ form.F$SL_cb)
F.sd.form.SnL <- rstandard(F.reg.form.SnL)
F.reg.form.SnL.plot <- ggplot(form.F, aes(x = SL_cb, y = SnL_cb)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
F.reg.form.SnL.plot
## `geom_smooth()` using formula = 'y ~ x'
F.reg.form.OL <- lm(form.F$OL_cb ~ form.F$SL_cb)
F.sd.form.OL <- rstandard(F.reg.form.OL)
F.reg.form.OL.plot <- ggplot(form.F, aes(x = SL_cb, y = OL_cb)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
F.reg.form.OL.plot
## `geom_smooth()` using formula = 'y ~ x'
F.reg.form.FLA <- lm(form.F$FLA ~ form.F$SL)
F.sd.form.FLA <- rstandard(F.reg.form.FLA)
F.reg.form.FLA.plot <- ggplot(form.F, aes(x = SL, y = FLA)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
F.reg.form.FLA.plot
## `geom_smooth()` using formula = 'y ~ x'
##### MEX #####
F.reg.mex.D <- lm(mex.F$D ~ mex.F$SL)
F.sd.mex.D <- rstandard(F.reg.mex.D)
F.reg.mex.D.plot <- ggplot(mex.F, aes(x = SL, y = D)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
F.reg.mex.D.plot
## `geom_smooth()` using formula = 'y ~ x'
F.reg.mex.P1 <- lm(mex.F$P1 ~ mex.F$SL)
F.sd.mex.P1 <- rstandard(F.reg.mex.P1)
F.reg.mex.P1.plot <- ggplot(mex.F, aes(x = SL, y = P1)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
F.reg.mex.P1.plot
## `geom_smooth()` using formula = 'y ~ x'
F.reg.mex.P2.L <- lm(mex.F$P2.L ~ mex.F$SL)
F.sd.mex.P2.L <- rstandard(F.reg.mex.P2.L)
F.reg.mex.P2.L.plot <- ggplot(mex.F, aes(x = SL, y = P2.L)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
F.reg.mex.P2.L.plot
## `geom_smooth()` using formula = 'y ~ x'
F.reg.mex.P2.R <- lm(mex.F$P2.R ~ mex.F$SL)
F.sd.mex.P2.R <- rstandard(F.reg.mex.P2.R)
F.reg.mex.P2.R.plot <- ggplot(mex.F, aes(x = SL, y = P2.R)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
F.reg.mex.P2.R.plot
## `geom_smooth()` using formula = 'y ~ x'
F.reg.mex.A <- lm(mex.F$A ~ mex.F$SL)
F.sd.mex.A <- rstandard(F.reg.mex.A)
F.reg.mex.A.plot <- ggplot(mex.F, aes(x = SL, y = A)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
F.reg.mex.A.plot
## `geom_smooth()` using formula = 'y ~ x'
F.reg.mex.P1.R <- lm(mex.F$P1.R ~ mex.F$SL)
F.sd.mex.P1.R <- rstandard(F.reg.mex.P1.R)
F.reg.mex.P1.R.plot <- ggplot(mex.F, aes(x = SL, y = P1.R)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
F.reg.mex.P1.R.plot
## `geom_smooth()` using formula = 'y ~ x'
F.reg.mex.LLSC <- lm(mex.F$LLSC ~ mex.F$SL)
F.sd.mex.LLSC <- rstandard(F.reg.mex.LLSC)
F.reg.mex.LLSC.plot <- ggplot(mex.F, aes(x = SL, y = LLSC)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
F.reg.mex.LLSC.plot
## `geom_smooth()` using formula = 'y ~ x'
F.reg.mex.SALL <- lm(mex.F$SALL ~ mex.F$SL)
F.sd.mex.SALL <- rstandard(F.reg.mex.SALL)
F.reg.mex.SALL.plot <- ggplot(mex.F, aes(x = SL, y = SALL)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
F.reg.mex.SALL.plot
## `geom_smooth()` using formula = 'y ~ x'
F.reg.mex.SBLL <- lm(mex.F$SBLL ~ mex.F$SL)
F.sd.mex.SBLL <- rstandard(F.reg.mex.SBLL)
F.reg.mex.SBLL.plot <- ggplot(mex.F, aes(x = SL, y = SBLL)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
F.reg.mex.SBLL.plot
## `geom_smooth()` using formula = 'y ~ x'
F.reg.mex.SBDF <- lm(mex.F$SBDF ~ mex.F$SL)
F.sd.mex.SBDF <- rstandard(F.reg.mex.SBDF)
F.reg.mex.SBDF.plot <- ggplot(mex.F, aes(x = SL, y = SBDF)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
F.reg.mex.SBDF.plot
## `geom_smooth()` using formula = 'y ~ x'
F.reg.mex.BD <- lm(mex.F$BD_cb ~ mex.F$SL_cb)
F.sd.mex.BD <- rstandard(F.reg.mex.BD)
F.reg.mex.BD.plot <- ggplot(mex.F, aes(x = SL_cb, y = BD_cb)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
F.reg.mex.BD.plot
## `geom_smooth()` using formula = 'y ~ x'
F.reg.mex.CPD <- lm(mex.F$CPD_cb ~ mex.F$SL_cb)
F.sd.mex.CPD <- rstandard(F.reg.mex.CPD)
F.reg.mex.CPD.plot <- ggplot(mex.F, aes(x = SL_cb, y = CPD_cb)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
F.reg.mex.CPD.plot
## `geom_smooth()` using formula = 'y ~ x'
F.reg.mex.CPL <- lm(mex.F$CPL_cb ~ mex.F$SL_cb)
F.sd.mex.CPL <- rstandard(F.reg.mex.CPL)
F.reg.mex.CPL.plot <- ggplot(mex.F, aes(x = SL_cb, y = CPL_cb)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
F.reg.mex.CPL.plot
## `geom_smooth()` using formula = 'y ~ x'
F.reg.mex.PreDL <- lm(mex.F$PreDL_cb ~ mex.F$SL_cb)
F.sd.mex.PreDL <- rstandard(F.reg.mex.PreDL)
F.reg.mex.PreDL.plot <- ggplot(mex.F, aes(x = SL_cb, y = PreDL_cb)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
F.reg.mex.PreDL.plot
## `geom_smooth()` using formula = 'y ~ x'
F.reg.mex.DbL <- lm(mex.F$DbL_cb ~ mex.F$SL_cb)
F.sd.mex.DbL <- rstandard(F.reg.mex.DbL)
F.reg.mex.DbL.plot <- ggplot(mex.F, aes(x = SL_cb, y = DbL_cb)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
F.reg.mex.DbL.plot
## `geom_smooth()` using formula = 'y ~ x'
F.reg.mex.HL <- lm(mex.F$HL_cb ~ mex.F$SL_cb)
F.sd.mex.HL <- rstandard(F.reg.mex.HL)
F.reg.mex.HL.plot <- ggplot(mex.F, aes(x = SL_cb, y = HL_cb)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
F.reg.mex.HL.plot
## `geom_smooth()` using formula = 'y ~ x'
F.reg.mex.HD <- lm(mex.F$HD_cb ~ mex.F$SL_cb)
F.sd.mex.HD <- rstandard(F.reg.mex.HD)
F.reg.mex.HD.plot <- ggplot(mex.F, aes(x = SL_cb, y = HD_cb)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
F.reg.mex.HD.plot
## `geom_smooth()` using formula = 'y ~ x'
F.reg.mex.HW <- lm(mex.F$HW_cb ~ mex.F$SL_cb)
F.sd.mex.HW <- rstandard(F.reg.mex.HW)
F.reg.mex.HW.plot <- ggplot(mex.F, aes(x = SL_cb, y = HW_cb)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
F.reg.mex.HW.plot
## `geom_smooth()` using formula = 'y ~ x'
F.reg.mex.SnL <- lm(mex.F$SnL_cb ~ mex.F$SL_cb)
F.sd.mex.SnL <- rstandard(F.reg.mex.SnL)
F.reg.mex.SnL.plot <- ggplot(mex.F, aes(x = SL_cb, y = SnL_cb)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
F.reg.mex.SnL.plot
## `geom_smooth()` using formula = 'y ~ x'
F.reg.mex.OL <- lm(mex.F$OL_cb ~ mex.F$SL_cb)
F.sd.mex.OL <- rstandard(F.reg.mex.OL)
F.reg.mex.OL.plot <- ggplot(mex.F, aes(x = SL_cb, y = OL_cb)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
F.reg.mex.OL.plot
## `geom_smooth()` using formula = 'y ~ x'
F.reg.mex.FLA <- lm(mex.F$FLA ~ mex.F$SL)
F.sd.mex.FLA <- rstandard(F.reg.mex.FLA)
F.reg.mex.FLA.plot <- ggplot(mex.F, aes(x = SL, y = FLA)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
F.reg.mex.FLA.plot
## `geom_smooth()` using formula = 'y ~ x'
STEP TWO: get residuals for each individual for traits that were influenced by body size
STEP THREE: convert residuals to absolute value
##### LAT #####
F.abs.lat.D <- abs(F.res.lat.D)
mean(F.abs.lat.D)
## [1] 0.5690529
F.abs.lat.P1 <- abs(F.res.lat.P1)
mean(F.abs.lat.P1)
## [1] 0.5432078
F.abs.lat.P1.R <- abs(F.res.lat.P1.R)
mean(F.abs.lat.P1.R)
## [1] 0.5782063
F.abs.lat.LLSC <- abs(F.res.lat.LLSC)
mean(F.abs.lat.LLSC)
## [1] 0.7316606
F.abs.lat.SBLL <- abs(F.res.lat.SBLL)
mean(F.abs.lat.SBLL)
## [1] 0.274228
F.abs.lat.BD <- abs(F.res.lat.BD)
mean(F.abs.lat.BD)
## [1] 0.04259109
F.abs.lat.CPD <- abs(F.res.lat.CPD)
mean(F.abs.lat.CPD)
## [1] 0.0430087
F.abs.lat.CPL <- abs(F.res.lat.CPL)
mean(F.abs.lat.CPL)
## [1] 0.04830372
F.abs.lat.PreDL <- abs(F.res.lat.PreDL)
mean(F.abs.lat.PreDL)
## [1] 0.04605678
F.abs.lat.DbL <- abs(F.res.lat.DbL)
mean(F.abs.lat.DbL)
## [1] 0.03464918
F.abs.lat.HL <- abs(F.res.lat.HL)
mean(F.abs.lat.HL)
## [1] 0.03556902
F.abs.lat.HD <- abs(F.res.lat.HD)
mean(F.abs.lat.HD)
## [1] 0.05432265
F.abs.lat.HW <- abs(F.res.lat.HW)
mean(F.abs.lat.HW)
## [1] 0.03929436
F.abs.lat.SnL <- abs(F.res.lat.SnL)
mean(F.abs.lat.SnL)
## [1] 0.04682771
F.abs.lat.OL <- abs(F.res.lat.OL)
mean(F.abs.lat.OL)
## [1] 0.04748132
##### FORM #####
F.abs.form.D <- abs(F.res.form.D)
mean(F.abs.form.D)
## [1] 0.5057929
F.abs.form.P1 <- abs(F.res.form.P1)
mean(F.abs.form.P1)
## [1] 0.5002061
F.abs.form.P1.R <- abs(F.res.form.P1.R)
mean(F.abs.form.P1.R)
## [1] 0.4644069
F.abs.form.LLSC <- abs(F.res.form.LLSC)
mean(F.abs.form.LLSC)
## [1] 0.8024266
F.abs.form.SBLL <- abs(F.res.form.SBLL)
mean(F.abs.form.SBLL)
## [1] 0.3032569
F.abs.form.BD <- abs(F.res.form.BD)
mean(F.abs.form.BD)
## [1] 0.04383959
F.abs.form.CPD <- abs(F.res.form.CPD)
mean(F.abs.form.CPD)
## [1] 0.04644898
F.abs.form.CPL <- abs(F.res.form.CPL)
mean(F.abs.form.CPL)
## [1] 0.0430726
F.abs.form.PreDL <- abs(F.res.form.PreDL)
mean(F.abs.form.PreDL)
## [1] 0.06921356
F.abs.form.DbL <- abs(F.res.form.DbL)
mean(F.abs.form.DbL)
## [1] 0.03908698
F.abs.form.HL <- abs(F.res.form.HL)
mean(F.abs.form.HL)
## [1] 0.03477795
F.abs.form.HD <- abs(F.res.form.HD)
mean(F.abs.form.HD)
## [1] 0.0465892
F.abs.form.HW <- abs(F.res.form.HW)
mean(F.abs.form.HW)
## [1] 0.03223909
F.abs.form.SnL <- abs(F.res.form.SnL)
mean(F.abs.form.SnL)
## [1] 0.04662763
F.abs.form.OL <- abs(F.res.form.OL)
mean(F.abs.form.OL)
## [1] 0.04214568
##### MEX #####
F.abs.mex.D <- abs(F.res.mex.D)
mean(F.abs.mex.D)
## [1] 0.09391999
F.abs.mex.P1 <- abs(F.res.mex.P1)
mean(F.abs.mex.P1)
## [1] 0.6592812
F.abs.mex.P1.R <- abs(F.res.mex.P1.R)
mean(F.abs.mex.P1.R)
## [1] 0.5711081
F.abs.mex.LLSC <- abs(F.res.mex.LLSC)
mean(F.abs.mex.LLSC)
## [1] 0.4403132
F.abs.mex.SBLL <- abs(F.res.mex.SBLL)
mean(F.abs.mex.SBLL)
## [1] 0.2860922
F.abs.mex.BD <- abs(F.res.mex.BD)
mean(F.abs.mex.BD)
## [1] 0.04110865
F.abs.mex.CPD <- abs(F.res.mex.CPD)
mean(F.abs.mex.CPD)
## [1] 0.06073298
F.abs.mex.CPL <- abs(F.res.mex.CPL)
mean(F.abs.mex.CPL)
## [1] 0.05225016
F.abs.mex.PreDL <- abs(F.res.mex.PreDL)
mean(F.abs.mex.PreDL)
## [1] 0.03155218
F.abs.mex.DbL <- abs(F.res.mex.DbL)
mean(F.abs.mex.DbL)
## [1] 0.04592219
F.abs.mex.HL <- abs(F.res.mex.HL)
mean(F.abs.mex.HL)
## [1] 0.03410048
F.abs.mex.HD <- abs(F.res.mex.HD)
mean(F.abs.mex.HD)
## [1] 0.04861302
F.abs.mex.HW <- abs(F.res.mex.HW)
mean(F.abs.mex.HW)
## [1] 0.0246207
F.abs.mex.SnL <- abs(F.res.mex.SnL)
mean(F.abs.mex.SnL)
## [1] 0.05065907
F.abs.mex.OL <- abs(F.res.mex.OL)
mean(F.abs.mex.OL)
## [1] 0.07302009
#let's get this into the raw1 data set so that we can plot this more easily
F.abs.res.D <- c(F.abs.lat.D, F.abs.form.D, F.abs.mex.D)
F.abs.res.P1 <- c(F.abs.lat.P1, F.abs.form.P1, F.abs.mex.P1)
F.abs.res.P1.R <- c(F.abs.lat.P1.R, F.abs.form.P1.R, F.abs.mex.P1.R)
F.abs.res.LLSC<- c(F.abs.lat.LLSC, F.abs.form.LLSC, F.abs.mex.LLSC)
F.abs.res.SBLL<- c(F.abs.lat.SBLL, F.abs.form.SBLL, F.abs.mex.SBLL)
F.abs.res.BD<- c(F.abs.lat.BD, F.abs.form.BD, F.abs.mex.BD)
F.abs.res.CPD<- c(F.abs.lat.CPD, F.abs.form.CPD, F.abs.mex.CPD)
F.abs.res.CPL<- c(F.abs.lat.CPL, F.abs.form.CPL, F.abs.mex.CPL)
F.abs.res.PreDL <- c(F.abs.lat.PreDL, F.abs.form.PreDL, F.abs.mex.PreDL)
F.abs.res.DbL <- c(F.abs.lat.DbL, F.abs.form.DbL, F.abs.mex.DbL)
F.abs.res.HL<- c(F.abs.lat.HL, F.abs.form.HL, F.abs.mex.HL)
F.abs.res.HD<- c(F.abs.lat.HD, F.abs.form.HD, F.abs.mex.HD)
F.abs.res.HW <- c(F.abs.lat.HW, F.abs.form.HW, F.abs.mex.HW)
F.abs.res.SnL <- c(F.abs.lat.SnL, F.abs.form.SnL, F.abs.mex.SnL)
F.abs.res.OL <- c(F.abs.lat.OL, F.abs.form.OL, F.abs.mex.OL)
rawF2 <- cbind(rawF, F.abs.res.D, F.abs.res.P1, F.abs.res.P1.R, F.abs.res.LLSC, F.abs.res.SBLL, F.abs.res.BD, F.abs.res.CPD, F.abs.res.CPL, F.abs.res.PreDL, F.abs.res.DbL, F.abs.res.HL, F.abs.res.HD, F.abs.res.HW, F.abs.res.SnL, F.abs.res.OL)
library(ggbeeswarm)
library(dplyr)
ggplot(rawF2, aes(SPP, F.abs.res.D)) +
geom_point(alpha=0.3) +
stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar",
width=0.03, colour="red", alpha=0.7) +
stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)
scatter_violin <- ggplot(data=rawF2, aes(x=SPP, y=F.abs.res.D)) +
geom_violin(trim = FALSE) +
stat_summary(
fun.data = "mean_sdl", fun.args = list(mult = 1),
geom = "pointrange", color = "black"
)
print(scatter_violin)
scatter_violin1 <- ggplot(data=rawF2, aes(x=SPP, y=F.abs.res.D)) +
geom_violin(trim = FALSE) +
stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar",
width=0.03, colour="red", alpha=0.7) +
stat_summary(fun=mean, geom="crossbar", fill="red", width=0.03)
print(scatter_violin1)
ggplot(rawF2, aes(SPP, F.abs.res.P1)) +
geom_point(alpha=0.3) +
stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar",
width=0.03, colour="red", alpha=0.7) +
stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)
ggplot(rawF2, aes(SPP, F.abs.res.P1.R)) +
geom_point(alpha=0.3) +
stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar",
width=0.03, colour="red", alpha=0.7) +
stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)
ggplot(rawF2, aes(SPP, F.abs.res.LLSC)) +
geom_point(alpha=0.3) +
stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar",
width=0.03, colour="red", alpha=0.7) +
stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)
ggplot(rawF2, aes(SPP, F.abs.res.SBLL)) +
geom_point(alpha=0.3) +
stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar",
width=0.03, colour="red", alpha=0.7) +
stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)
ggplot(rawF2, aes(SPP, F.abs.res.BD)) +
geom_point(alpha=0.3) +
stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar",
width=0.03, colour="red", alpha=0.7) +
stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)
ggplot(rawF2, aes(SPP, F.abs.res.CPD)) +
geom_point(alpha=0.3) +
stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar",
width=0.03, colour="red", alpha=0.7) +
stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)
ggplot(rawF2, aes(SPP, F.abs.res.CPL)) +
geom_point(alpha=0.3) +
stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar",
width=0.03, colour="red", alpha=0.7) +
stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)
ggplot(rawF2, aes(SPP, F.abs.res.PreDL)) +
geom_point(alpha=0.3) +
stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar",
width=0.03, colour="red", alpha=0.7) +
stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)
ggplot(rawF2, aes(SPP, F.abs.res.DbL)) +
geom_point(alpha=0.3) +
stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar",
width=0.03, colour="red", alpha=0.7) +
stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)
ggplot(rawF2, aes(SPP, F.abs.res.HL)) +
geom_point(alpha=0.3) +
stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar",
width=0.03, colour="red", alpha=0.7) +
stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)
ggplot(rawF2, aes(SPP, F.abs.res.HD)) +
geom_point(alpha=0.3) +
stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar",
width=0.03, colour="red", alpha=0.7) +
stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)
ggplot(rawF2, aes(SPP, F.abs.res.HW)) +
geom_point(alpha=0.3) +
stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar",
width=0.03, colour="red", alpha=0.7) +
stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)
ggplot(rawF2, aes(SPP, F.abs.res.SnL)) +
geom_point(alpha=0.3) +
stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar",
width=0.03, colour="red", alpha=0.7) +
stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)
ggplot(rawF2, aes(SPP, F.abs.res.OL)) +
geom_point(alpha=0.3) +
stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar",
width=0.03, colour="red", alpha=0.7) +
stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)
(note: did non-parametric instead of transforming, but Mike said to transform, so I copied that work to the ‘morphology-analysis-final’ Rmd).
F-test on residuals doesn’t make much sense; the residuals themselves are representative of the variation present, as they are the distance from the mean. Therefore, F-test on residuals is like variance of the variance. Instead, I have to do a t-test on the absolute value of the residuals. In this sense, we want to see if the mean of the absolute residuals is higher or lower for asexual species–is the average amount of variation higher or lower for this trait? Based on the regressions, if the trait was influenced by body size, I will perform a t-test on the absolute value of the residuals. If the trait was not influenced by body size, I will perform an F-test of variance on the raw data.
Quick results summary: For the F-test on raw data, none of the traits were significantly different (P2L/R, A, SBDF, FLA). For the t-tests on residuals, the only significant traits are left pectoral (), right pectoral (lat>form), scales above lateral line (), scales below lateral line (form>lat), and head length ().
- will do two-tailed and check out the residual means to infer direction; for traits in which we use raw data, a one-tailed f-test will be perfomed in both direction to determine which species is varying more. We will also visulize the variation using a histogram to confirm direction results.
##
## Welch Two Sample t-test
##
## data: F.abs.lat.BD and F.abs.form.BD
## t = -0.22141, df = 157.72, p-value = 0.8251
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.012386061 0.009889057
## sample estimates:
## mean of x mean of y
## 0.04259109 0.04383959
##
## Welch Two Sample t-test
##
## data: F.abs.lat.CPD and F.abs.form.CPD
## t = -0.71041, df = 194.99, p-value = 0.4783
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.01299104 0.00611049
## sample estimates:
## mean of x mean of y
## 0.04300870 0.04644898
##
## Welch Two Sample t-test
##
## data: F.abs.lat.CPL and F.abs.form.CPL
## t = 0.96339, df = 185.53, p-value = 0.3366
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.005481189 0.015943440
## sample estimates:
## mean of x mean of y
## 0.04830372 0.04307260
##
## Welch Two Sample t-test
##
## data: F.abs.lat.PreDL and F.abs.form.PreDL
## t = -2.0243, df = 142.34, p-value = 0.04481
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.0457695696 -0.0005439898
## sample estimates:
## mean of x mean of y
## 0.04605678 0.06921356
##
## Welch Two Sample t-test
##
## data: F.abs.lat.DbL and F.abs.form.DbL
## t = -1.0744, df = 193.77, p-value = 0.284
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.012584376 0.003708775
## sample estimates:
## mean of x mean of y
## 0.03464918 0.03908698
##
## Welch Two Sample t-test
##
## data: F.abs.lat.HL and F.abs.form.HL
## t = 0.19004, df = 194.71, p-value = 0.8495
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.007418814 0.009000959
## sample estimates:
## mean of x mean of y
## 0.03556902 0.03477795
##
## Welch Two Sample t-test
##
## data: F.abs.lat.HD and F.abs.form.HD
## t = 0.76056, df = 114.7, p-value = 0.4485
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.01240816 0.02787505
## sample estimates:
## mean of x mean of y
## 0.05432265 0.04658920
##
## Welch Two Sample t-test
##
## data: F.abs.lat.HW and F.abs.form.HW
## t = 1.7227, df = 194.29, p-value = 0.08653
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.00102204 0.01513258
## sample estimates:
## mean of x mean of y
## 0.03929436 0.03223909
##
## Welch Two Sample t-test
##
## data: F.abs.lat.SnL and F.abs.form.SnL
## t = 0.036021, df = 181.24, p-value = 0.9713
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.01075997 0.01116014
## sample estimates:
## mean of x mean of y
## 0.04682771 0.04662763
##
## Welch Two Sample t-test
##
## data: F.abs.lat.OL and F.abs.form.OL
## t = 1.1334, df = 193.33, p-value = 0.2584
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.003949233 0.014620512
## sample estimates:
## mean of x mean of y
## 0.04748132 0.04214568
Gonna try F-tests for funsies. Would be interesting if both are varying, but in different ways (amazon with identical variance around best fit, whereas lat with more variation around best fit for example).
##
## F test to compare two variances
##
## data: F.abs.lat.BD and F.abs.form.BD
## F = 1.9493, num df = 89, denom df = 106, p-value = 0.001031
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 1.310124 2.920837
## sample estimates:
## ratio of variances
## 1.949263
##
## F test to compare two variances
##
## data: F.abs.lat.CPD and F.abs.form.CPD
## F = 0.69807, num df = 89, denom df = 106, p-value = 0.08123
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.4691832 1.0460139
## sample estimates:
## ratio of variances
## 0.6980727
##
## F test to compare two variances
##
## data: F.abs.lat.CPL and F.abs.form.CPL
## F = 1.1102, num df = 89, denom df = 106, p-value = 0.6034
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.7461849 1.6635714
## sample estimates:
## ratio of variances
## 1.110209
##
## F test to compare two variances
##
## data: F.abs.lat.PreDL and F.abs.form.PreDL
## F = 0.15246, num df = 89, denom df = 106, p-value < 2.2e-16
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.1024735 0.2284582
## sample estimates:
## ratio of variances
## 0.1524649
##
## F test to compare two variances
##
## data: F.abs.lat.DbL and F.abs.form.DbL
## F = 0.82797, num df = 89, denom df = 106, p-value = 0.3592
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.5564879 1.2406541
## sample estimates:
## ratio of variances
## 0.8279687
##
## F test to compare two variances
##
## data: F.abs.lat.HL and F.abs.form.HL
## F = 0.65345, num df = 89, denom df = 106, p-value = 0.0393
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.4391934 0.9791536
## sample estimates:
## ratio of variances
## 0.6534525
##
## F test to compare two variances
##
## data: F.abs.lat.HD and F.abs.form.HD
## F = 5.7907, num df = 89, denom df = 106, p-value < 2.2e-16
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 3.891984 8.676929
## sample estimates:
## ratio of variances
## 5.790676
##
## F test to compare two variances
##
## data: F.abs.lat.HW and F.abs.form.HW
## F = 0.79707, num df = 89, denom df = 106, p-value = 0.2705
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.535722 1.194358
## sample estimates:
## ratio of variances
## 0.7970722
##
## F test to compare two variances
##
## data: F.abs.lat.SnL and F.abs.form.SnL
## F = 1.2289, num df = 89, denom df = 106, p-value = 0.3081
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.8259558 1.8414155
## sample estimates:
## ratio of variances
## 1.228896
##
## F test to compare two variances
##
## data: F.abs.lat.OL and F.abs.form.OL
## F = 0.85007, num df = 89, denom df = 106, p-value = 0.4303
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.5713413 1.2737690
## sample estimates:
## ratio of variances
## 0.8500684
Logan suggested doing a glm with poisson distribution (because it is count data) for these relationships. I originally performed Mann Whitney U tests, so will also include that. Overall, results don’t differ even though exact values are slightly different.
Had to remove the poisson part, becausethe residuals didn’t meet the non-integer requirement for poisson (it freaked out and took forever to load).
rawF3 <- rawF2[rawF2$SPP !="p.mexicana", ]
Fglm_D <- glm(F.abs.res.D~SPP, data=rawF3)
print(summary(Fglm_D), show.residuals=TRUE)
##
## Call:
## glm(formula = F.abs.res.D ~ SPP, data = rawF3)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.4962 -0.4088 -0.1003 0.2814 1.5729
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.50579 0.03791 13.343 <2e-16 ***
## SPPp.latipinna 0.06326 0.05608 1.128 0.261
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 0.1537471)
##
## Null deviance: 30.176 on 196 degrees of freedom
## Residual deviance: 29.981 on 195 degrees of freedom
## AIC: 194.18
##
## Number of Fisher Scoring iterations: 2
Fglm_P1 <- glm(F.abs.res.P1~SPP, data=rawF3)
print(summary(Fglm_P1), show.residuals=TRUE)
##
## Call:
## glm(formula = F.abs.res.P1 ~ SPP, data = rawF3)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.5319 -0.3229 -0.1631 0.2616 2.8502
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.50021 0.04343 11.517 <2e-16 ***
## SPPp.latipinna 0.04300 0.06425 0.669 0.504
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 0.2018213)
##
## Null deviance: 39.446 on 196 degrees of freedom
## Residual deviance: 39.355 on 195 degrees of freedom
## AIC: 247.78
##
## Number of Fisher Scoring iterations: 2
Fglm_P1.R <- glm(F.abs.res.P1.R~SPP, data=rawF3)
print(summary(Fglm_P1.R), show.residuals=TRUE)
##
## Call:
## glm(formula = F.abs.res.P1.R ~ SPP, data = rawF3)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.4613 -0.3169 -0.1726 0.1641 1.5727
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.46441 0.03992 11.635 <2e-16 ***
## SPPp.latipinna 0.11380 0.05906 1.927 0.0554 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 0.1704834)
##
## Null deviance: 33.877 on 196 degrees of freedom
## Residual deviance: 33.244 on 195 degrees of freedom
## AIC: 214.54
##
## Number of Fisher Scoring iterations: 2
Fglm_LLSC <- glm(F.abs.res.LLSC~SPP, data=rawF3)
print(summary(Fglm_LLSC), show.residuals=TRUE)
##
## Call:
## glm(formula = F.abs.res.LLSC ~ SPP, data = rawF3)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.7863 -0.4836 -0.2411 0.3615 3.0681
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.80243 0.06431 12.477 <2e-16 ***
## SPPp.latipinna -0.07077 0.09515 -0.744 0.458
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 0.4425906)
##
## Null deviance: 86.550 on 196 degrees of freedom
## Residual deviance: 86.305 on 195 degrees of freedom
## AIC: 402.47
##
## Number of Fisher Scoring iterations: 2
This will be performed on traits that DID vary with SL.
(FMW_D <- wilcox.test(F.abs.res.D~SPP, data=rawF3, conf.int=T))
##
## Wilcoxon rank sum test with continuity correction
##
## data: F.abs.res.D by SPP
## W = 4655, p-value = 0.6891
## alternative hypothesis: true location shift is not equal to 0
## 95 percent confidence interval:
## -0.1669364 0.1167654
## sample estimates:
## difference in location
## -0.03080542
(FMW_P1 <- wilcox.test(F.abs.res.P1~SPP, data=rawF3, conf.int=T))
##
## Wilcoxon rank sum test with continuity correction
##
## data: F.abs.res.P1 by SPP
## W = 4109, p-value = 0.07674
## alternative hypothesis: true location shift is not equal to 0
## 95 percent confidence interval:
## -0.22601642 0.01555044
## sample estimates:
## difference in location
## -0.1351052
(FMW_P1R <- wilcox.test(F.abs.res.P1.R~SPP, data=rawF3, conf.int=T))
##
## Wilcoxon rank sum test with continuity correction
##
## data: F.abs.res.P1.R by SPP
## W = 3610, p-value = 0.002513
## alternative hypothesis: true location shift is not equal to 0
## 95 percent confidence interval:
## -0.21291314 -0.07332029
## sample estimates:
## difference in location
## -0.1551889
(FMW_LLSC <- wilcox.test(F.abs.res.LLSC~SPP, data=rawF3, conf.int=T))
##
## Wilcoxon rank sum test with continuity correction
##
## data: F.abs.res.LLSC by SPP
## W = 5189, p-value = 0.3488
## alternative hypothesis: true location shift is not equal to 0
## 95 percent confidence interval:
## -0.06546302 0.19872269
## sample estimates:
## difference in location
## 0.05354888
I did notice that the deviance residual information was out of whack (median not super close to zero in many cases, the max and min VERY different). In the EXTRAS rscript, I ran a DHARMa test to see what the issue was, and apparently they fail the levene’s test for homogeneity of variance. This indicates that while the AVERAGE variance is not different between the two species, there could be a different in how the species are varying, which may be interesting (similar to the F-tests of the continuous residuals). Will run some levene’s test for these discrete residuals, just to see.
library(car)
library(carData)
(FL_D <- leveneTest(F.abs.res.D~SPP, data=rawF3))
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 1 17.115 5.229e-05 ***
## 195
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(FL_P1 <- leveneTest(F.abs.res.P1~SPP, data=rawF3))
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 1 8.3383 0.00432 **
## 195
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(FL_P1R <- leveneTest(F.abs.res.P1.R~SPP, data=rawF3))
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 1 1.1637 0.282
## 195
(FL_LLSC <- leveneTest(F.abs.res.LLSC~SPP, data=rawF3))
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 1 0.0031 0.9555
## 195
EVEN THOUGH THESE ARE DISCRETE, NO IDEA WHAT ELSE TO DO WITH THEM (glmm on raw data will just compare means). Did both F-test and Levene’s test. Get the same overall results, albeit slightly different values. Might go with Levene’s since it is good with non-normal data.
This will only be for the traits that did not have a significant regression compared to SL (so P2s, A, SALL, SBDF, and FLA); none of these traits were transformed, so just doing raw for now.
Fvar.left.pel <- var.test(lat.F$P2.L, form.F$P2.L, alternative = "two.sided")
Fvar.left.pel
##
## F test to compare two variances
##
## data: lat.F$P2.L and form.F$P2.L
## F = NaN, num df = 89, denom df = 106, p-value = NA
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## NaN NaN
## sample estimates:
## ratio of variances
## NaN
ggplot(rawF2, aes(SPP, P2.L)) +
geom_point(alpha=0.3) +
stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar",
width=0.03, colour="red", alpha=0.7) +
stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)
Fvar.rig.pel <- var.test(lat.F$P2.R, form.F$P2.R, alternative = "two.sided")
Fvar.rig.pel
##
## F test to compare two variances
##
## data: lat.F$P2.R and form.F$P2.R
## F = NaN, num df = 89, denom df = 106, p-value = NA
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## NaN NaN
## sample estimates:
## ratio of variances
## NaN
ggplot(rawF2, aes(SPP, P2.R)) +
geom_point(alpha=0.3) +
stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar",
width=0.03, colour="red", alpha=0.7) +
stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)
Fvar.anal <- var.test(lat.F$A, form.F$A, alternative = "two.sided")
Fvar.anal
##
## F test to compare two variances
##
## data: lat.F$A and form.F$A
## F = 1.2237, num df = 89, denom df = 106, p-value = 0.3182
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.8224347 1.8335655
## sample estimates:
## ratio of variances
## 1.223657
ggplot(rawF2, aes(SPP, A)) +
geom_point(alpha=0.3) +
stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar",
width=0.03, colour="red", alpha=0.7) +
stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)
Fvar.sca.ab.ll <- var.test(lat.F$SALL, form.F$SALL, alternative = "two.sided")
Fvar.sca.ab.ll
##
## F test to compare two variances
##
## data: lat.F$SALL and form.F$SALL
## F = 0.66255, num df = 89, denom df = 106, p-value = 0.04607
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.4453070 0.9927833
## sample estimates:
## ratio of variances
## 0.6625485
ggplot(rawF2, aes(SPP, SALL)) +
geom_point(alpha=0.3) +
stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar",
width=0.03, colour="red", alpha=0.7) +
stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)
Fvar.sca.df <- var.test(lat.F$SBDF, form.F$SBDF, alternative = "two.sided")
Fvar.sca.df
##
## F test to compare two variances
##
## data: lat.F$SBDF and form.F$SBDF
## F = 0.77117, num df = 89, denom df = 106, p-value = 0.2069
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.5183145 1.1555489
## sample estimates:
## ratio of variances
## 0.7711725
ggplot(rawF2, aes(SPP, SBDF)) +
geom_point(alpha=0.3) +
stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar",
width=0.03, colour="red", alpha=0.7) +
stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)
Fvar.flu.asy <- var.test(lat.F$FLA, form.F$FLA, alternative = "two.sided")
Fvar.flu.asy
##
## F test to compare two variances
##
## data: lat.F$FLA and form.F$FLA
## F = 0.74762, num df = 89, denom df = 106, p-value = 0.1578
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.5024821 1.1202516
## sample estimates:
## ratio of variances
## 0.7476163
ggplot(rawF2, aes(SPP, FLA)) +
geom_point(alpha=0.3) +
stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar",
width=0.03, colour="red", alpha=0.7) +
stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)
Still only performing this on the traits that did NOT vary with SL (P2L/R, A, SALL, SBLL [between sail and amz only], SBDF, FLA).
(FLT_P2L <- leveneTest(P2.L~SPP, data=rawF3)) #gives nothing since it's all the same value
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 1 NaN NaN
## 195
(FLT_P2R <- leveneTest(P2.R~SPP, data=rawF3)) #same as above
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 1 NaN NaN
## 195
(FLT_A <- leveneTest(A~SPP, data=rawF3))
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 1 0.9255 0.3372
## 195
(FLT_SALL <- leveneTest(SALL~SPP, data=rawF3))
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 1 4.7837 0.02992 *
## 195
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(FLT_SBLL <- leveneTest(SBLL~SPP, data=rawF3))
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 1 0.7446 0.3893
## 195
(FLT_SBDF <- leveneTest(SBDF~SPP, data=rawF3))
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 1 0.0845 0.7716
## 195
(FLT_FLA <- leveneTest(FLA~SPP, data=rawF3))
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 1 0.2675 0.6056
## 195
WHY WON’T THIS TABLE WORK!!!!!!!!
average.table <- matrix(c(MW_D\(p.value, mean(F.abs.lat.F.D, na.rm = TRUE), mean(F.abs.form.D, na.rm = TRUE), MW_P1\)p.value, mean(F.abs.lat.F.P1, na.rm = TRUE), mean(F.abs.form.P1, na.rm = TRUE), LT_P2R\("Pr(>F)", mean(lat.F\)P2.R, na.rm = TRUE), mean(form\(P2.R, na.rm = TRUE), LT_P2R\)“Pr(>F)”, mean(lat.F\(P2.L, na.rm = TRUE), mean(form\)P2.L, na.rm = TRUE), LT_A\("Pr(>F)", mean(lat.F\)A, na.rm = TRUE), mean(form\(A, na.rm = TRUE), MW_P1R\)p.value, mean(F.abs.lat.F.P1.R, na.rm = TRUE), mean(F.abs.form.P1.R, na.rm = TRUE), MW_LLSC\(p.value, mean(F.abs.lat.F.LLSC, na.rm = TRUE), mean(F.abs.form.LLSC, na.rm = TRUE), LT_SALL\)“Pr(>F)”, mean(lat.F\(SALL, na.rm = TRUE), mean(form\)SALL, na.rm = TRUE), LT_SBLL\("Pr(>F)", mean(F.abs.lat.F.SBLL, na.rm = TRUE), mean(F.abs.form.SBLL, na.rm = TRUE), LT_SBDF\)“Pr(>F)”, mean(lat.F\(SBDF, na.rm = TRUE), mean(form\)SBDF, na.rm = TRUE), ttest.F.abs.BD\(p.value, mean(F.abs.lat.F.BD, na.rm = TRUE), mean(F.abs.form.BD, na.rm = TRUE), ttest.F.abs.CPD\)p.value, mean(F.abs.lat.F.CPD, na.rm = TRUE), mean(F.abs.form.CPD, na.rm = TRUE), ttest.F.abs.CPL\(p.value, mean(F.abs.lat.F.CPL, na.rm = TRUE), mean(F.abs.form.CPL, na.rm = TRUE), ttest.F.abs.PreDL\)p.value, mean(F.abs.lat.F.PreDL, na.rm = TRUE), mean(F.abs.form.PreDL, na.rm = TRUE), ttest.F.abs.DbL\(p.value, mean(F.abs.lat.F.DbL, na.rm = TRUE), mean(F.abs.form.DbL, na.rm = TRUE), ttest.F.abs.HL\)p.value, mean(F.abs.lat.F.HL, na.rm = TRUE), mean(F.abs.form.HL, na.rm = TRUE), ttest.F.abs.HD\(p.value, mean(F.abs.lat.F.HD, na.rm = TRUE), mean(F.abs.form.HD, na.rm = TRUE), ttest.F.abs.HW\)p.value, mean(F.abs.lat.F.HW, na.rm = TRUE), mean(F.abs.form.HW, na.rm = TRUE), ttest.F.abs.SnL\(p.value, mean(F.abs.lat.F.SnL, na.rm = TRUE), mean(F.abs.form.SnL, na.rm = TRUE), ttest.F.abs.OL\)p.value, mean(F.abs.lat.F.OL, na.rm = TRUE), mean(F.abs.form.OL, na.rm = TRUE), LT_FLA\("Pr(>F)", mean(lat.F\)FLA, na.rm = TRUE), mean(form$FLA, na.rm = TRUE)), ncol=3, byrow=TRUE)
colnames(average.table)<- c(“p-value for variance”, “lat mean”, “form mean”)
rownames(average.table) <- c(“dorsal rays”, “L pect rays”, “R pelvic rays”, ”L pelvic rays”, “Anal rays”, “R pect rays”, ”lat line scales”, ”scales above LL”, ”scales below LL”, ”scales before dor”, “body dep”, “peduncle dep”, “peduncle leng”, “pre-dorsal”, “dorsal base”, “head length”, “head depth”, “head width”, “snout leng”, “orbital leng”, “fluct. asymmetries”)
average.table
plot_multi_histogram <- function(df, feature, label_column) {
plt <- ggplot(df, aes(x=eval(parse(text=feature)), fill=eval(parse(text=label_column)))) +
geom_histogram(bins=10, alpha=0.4, position="identity", aes(y = ..density..), color="black") +
geom_density(alpha=0.2) +
labs(x=feature, y = "Density")
plt + guides(fill=guide_legend(title=label_column))
}
plot_multi_histogram(rawF3, "F.abs.res.CPD", "SPP")
plot_multi_histogram(rawF3, "F.abs.res.DbL", "SPP")
plot_multi_histogram(rawF3, "F.abs.res.P1", "SPP")
plot_multi_histogram(rawF3, "F.abs.res.P1.R", "SPP")
LOGAN: CHECK THAT EACH VARIABLES IS NEAR NORMALLY DISTRIBUTED. IF NOT, LOG TRANSFORM BEFORE PCA. ALSO CHECK THAT PCA CALCULATES Z SCORES AND PLOTS BASED ON THAT; IF NOT CONVERT TO Z SCORES THEN RUN PCA.
In this analysis, I will compare the principle components after centering and scaling the data. A PCA analysis will help us determine what aspects of morphology influence the variation in our data the most without worrying about differences in scales/measurements. Currently, data consists of 116 Sailfin and 186 Amazon.
(chart <- matrix(c("Variable chart:", "D: dorsal ray count", "P1: left pectoral ray count", "P2.R: right pelvic rays", "P2.L: left pelvic rays", "A: anal rays", "P1.R: right pectoral ray count", "LLSC: lateral line scale count", "SALL: scales above lateral line", "SBLL: scales below lateral line", "SBDF: scales before dorsal fin", "TL: total length", "SL: standard length", "BD: body depth", "CPD: caudal peduncle depth", "CPL: caudal peduncle length", "PreDL: pre-dorsal length", "DbL: dorsal base length", "HL/HW/HD: head length/width/depth", "SnL: snout length", "OL: ocular length"), ncol=1, byrow=TRUE))
## [,1]
## [1,] "Variable chart:"
## [2,] "D: dorsal ray count"
## [3,] "P1: left pectoral ray count"
## [4,] "P2.R: right pelvic rays"
## [5,] "P2.L: left pelvic rays"
## [6,] "A: anal rays"
## [7,] "P1.R: right pectoral ray count"
## [8,] "LLSC: lateral line scale count"
## [9,] "SALL: scales above lateral line"
## [10,] "SBLL: scales below lateral line"
## [11,] "SBDF: scales before dorsal fin"
## [12,] "TL: total length"
## [13,] "SL: standard length"
## [14,] "BD: body depth"
## [15,] "CPD: caudal peduncle depth"
## [16,] "CPL: caudal peduncle length"
## [17,] "PreDL: pre-dorsal length"
## [18,] "DbL: dorsal base length"
## [19,] "HL/HW/HD: head length/width/depth"
## [20,] "SnL: snout length"
## [21,] "OL: ocular length"
library(stringr)
rawFa <- subset(rawF, select = -c(17:18) ) #took out P2R and L since they don't vary at all
rawFa <- subset(rawFa, select=-c(34:49))#took out transformed values
rawFa <- raw1a[rawFa$SPP !="p.mexicana", ]
#PCA <- prcomp(raw1a[, 15:32], center=TRUE, scale. = TRUE) #includes all but pelvic traits, since they are the same in all species WILL SCALE/CENTER DATA ON MY OWN, LOGAN SAID IT IS NOT ALWAYS DOING IT FOR YOU EVEN WHEN YOU SPECIFY
z.scores <- scale(rawFa[, 15:33], center = TRUE, scale = TRUE)
rawFa <- cbind(rawFa, z.scores)
colnames(rawFa)[34:52] <- str_c( "z_", colnames(rawFa)[15:33] )
PCA <- prcomp(rawFa[, 34:52])
summary(PCA)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7
## Standard deviation 3.0451 1.5955 1.15877 1.02890 0.97933 0.94553 0.86794
## Proportion of Variance 0.4857 0.1333 0.07034 0.05545 0.05024 0.04683 0.03946
## Cumulative Proportion 0.4857 0.6191 0.68942 0.74487 0.79511 0.84194 0.88140
## PC8 PC9 PC10 PC11 PC12 PC13 PC14
## Standard deviation 0.78450 0.61186 0.54313 0.52632 0.48700 0.35838 0.3211
## Proportion of Variance 0.03224 0.01961 0.01545 0.01451 0.01242 0.00673 0.0054
## Cumulative Proportion 0.91364 0.93325 0.94871 0.96322 0.97564 0.98237 0.9878
## PC15 PC16 PC17 PC18 PC19
## Standard deviation 0.27041 0.23437 0.22581 0.21238 0.09658
## Proportion of Variance 0.00383 0.00288 0.00267 0.00236 0.00049
## Cumulative Proportion 0.99160 0.99448 0.99715 0.99951 1.00000
(loadings <- PCA$rotation[, 1:5])
## PC1 PC2 PC3 PC4 PC5
## z_D -0.024932852 0.539919430 0.262981450 0.0019549264 0.008897738
## z_P1 -0.011656903 -0.364676112 0.443470295 0.1665280085 0.175868336
## z_A -0.005938808 -0.002860544 0.522883245 0.3416997918 -0.609219674
## z_P1.R -0.027301766 -0.387538257 0.362832009 0.2093349299 0.182325271
## z_LLSC -0.079403940 0.294129806 0.210958862 0.0246490941 0.286848280
## z_SALL -0.024696704 -0.004597863 -0.464372603 0.7554034917 -0.105957519
## z_SBLL -0.056053464 0.116645415 0.062223759 0.3844194681 0.651593233
## z_SBDF -0.013615614 -0.438467487 -0.127592911 -0.2029812456 0.077389865
## z_SL -0.326364918 -0.059288958 -0.002535681 -0.0297115833 -0.004742763
## z_BD -0.317192963 0.075390220 0.002383938 0.0043117059 0.002359419
## z_CPD -0.321195688 -0.024621393 -0.002873030 -0.0074220409 -0.009362872
## z_CPL -0.312709228 -0.076204787 0.024859254 -0.0093059666 0.037881779
## z_PreDL -0.303629075 -0.193147718 -0.056181828 0.0009717231 -0.031493451
## z_DbL -0.269649396 0.273382827 0.106580535 -0.0193731390 0.016715436
## z_HL -0.284051341 -0.040427376 0.056423387 -0.1302793984 -0.016080279
## z_HD -0.318660375 0.005541232 -0.027405835 -0.0908654949 -0.015562966
## z_HW -0.317787088 -0.043275808 -0.088359244 0.0590131634 -0.003378493
## z_SnL -0.215412281 0.037698555 -0.147851108 0.1388268838 -0.187343848
## z_OL -0.292605987 0.017186401 0.008213101 -0.0453759518 -0.045785030
sorted.loadings.1 <- loadings[order(loadings[, 1]), 1]
myTitle <- "Loadings Plot for PC1"
myXlab <- "Variable Loadings"
dotchart(sorted.loadings.1, main=myTitle, xlab=myXlab, cex=1.5, col="red")
sorted.loadings.2 <- loadings[order(loadings[, 2]), 2]
myTitle <- "Loadings Plot for PC2"
myXlab <- "Variable Loadings"
dotchart(sorted.loadings.2, main=myTitle, xlab=myXlab, cex=1.5, col="red")
sorted.loadings.3 <- loadings[order(loadings[, 3]), 3]
myTitle <- "Loadings Plot for PC3"
myXlab <- "Variable Loadings"
dotchart(sorted.loadings.3, main=myTitle, xlab=myXlab, cex=1.5, col="red")
VM_PCA <- varimax(PCA$rotation)
VM_PCA
## $loadings
##
## Loadings:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7 PC8 PC9 PC10 PC11 PC12 PC13 PC14 PC15 PC16
## z_D -1
## z_P1 -1
## z_A 1
## z_P1.R 1
## z_LLSC 1
## z_SALL 1
## z_SBLL 1
## z_SBDF -1
## z_SL
## z_BD -1
## z_CPD
## z_CPL -1
## z_PreDL
## z_DbL 1
## z_HL -1
## z_HD 1
## z_HW 1
## z_SnL -1
## z_OL 1
## PC17 PC18 PC19
## z_D
## z_P1
## z_A
## z_P1.R
## z_LLSC
## z_SALL
## z_SBLL
## z_SBDF
## z_SL 1
## z_BD
## z_CPD 1
## z_CPL
## z_PreDL -1
## z_DbL
## z_HL
## z_HD
## z_HW
## z_SnL
## z_OL
##
## PC1 PC2 PC3 PC4 PC5 PC6 PC7 PC8 PC9 PC10
## SS loadings 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000
## Proportion Var 0.053 0.053 0.053 0.053 0.053 0.053 0.053 0.053 0.053 0.053
## Cumulative Var 0.053 0.105 0.158 0.211 0.263 0.316 0.368 0.421 0.474 0.526
## PC11 PC12 PC13 PC14 PC15 PC16 PC17 PC18 PC19
## SS loadings 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000
## Proportion Var 0.053 0.053 0.053 0.053 0.053 0.053 0.053 0.053 0.053
## Cumulative Var 0.579 0.632 0.684 0.737 0.789 0.842 0.895 0.947 1.000
##
## $rotmat
## [,1] [,2] [,3] [,4] [,5]
## [1,] 0.317192964 0.01361561 -0.005938765 -0.024696704 -0.056053464
## [2,] -0.075390220 0.43846744 -0.002859942 -0.004597863 0.116645418
## [3,] -0.002383938 0.12759292 0.522882685 -0.464372603 0.062223761
## [4,] -0.004311706 0.20298125 0.341699471 0.755403491 0.384419470
## [5,] -0.002359419 -0.07738984 -0.609219961 -0.105957519 0.651593227
## [6,] -0.045148499 0.49511106 -0.451288392 0.137933549 -0.500042036
## [7,] 0.003697336 -0.31211509 0.013080354 0.329245578 -0.382418060
## [8,] -0.089427151 0.16037263 0.074395675 0.128165522 0.067310423
## [9,] 0.101737290 0.11145902 0.044513214 -0.042236960 0.006599946
## [10,] -0.078810024 -0.41558255 -0.028241436 0.032406575 -0.058939919
## [11,] 0.008167391 0.39228497 0.093520019 -0.182802569 0.022934029
## [12,] 0.214480339 0.12138151 -0.119907151 0.106777307 -0.017817769
## [13,] -0.139953638 -0.08474196 0.011126420 0.042182980 -0.026130449
## [14,] 0.151587724 -0.10318157 0.008286893 0.066711962 0.017705934
## [15,] 0.555251254 -0.00487988 0.016279587 0.038460416 0.017549080
## [16,] 0.520809165 -0.02743618 0.016133802 -0.044287292 -0.004227632
## [17,] -0.159821245 -0.05527441 0.051101404 0.003135373 0.021106911
## [18,] 0.415031205 0.01592530 -0.003388165 0.023335505 0.006711509
## [19,] -0.033016284 0.01241799 0.005690672 0.011563679 0.006171570
## [,6] [,7] [,8] [,9] [,10]
## [1,] -0.027301775 -0.0794039404 2.154123e-01 0.011656905 0.284051341
## [2,] -0.387538262 0.2941298061 -3.769856e-02 0.364676141 0.040427377
## [3,] 0.362832761 0.2109588623 1.478511e-01 -0.443470335 -0.056423387
## [4,] 0.209335425 0.0246490942 -1.388269e-01 -0.166528032 0.130279398
## [5,] 0.182324350 0.2868482804 1.873438e-01 -0.175868329 0.016080279
## [6,] 0.335980270 -0.0763556014 -1.287215e-02 -0.292565663 0.132368073
## [7,] -0.041310041 0.7181261476 3.314511e-01 -0.040125531 -0.092874716
## [8,] -0.057730839 -0.3579994212 8.685278e-01 0.087690256 -0.082140221
## [9,] 0.615008492 0.0867405269 4.011526e-05 0.663511870 -0.280479798
## [10,] 0.337817076 -0.1193484427 2.935007e-02 0.213636252 0.559533564
## [11,] -0.029665589 0.3087437940 5.586866e-02 0.144558136 0.442142739
## [12,] 0.080364845 -0.0404628525 -5.848374e-02 -0.061496400 -0.440587187
## [13,] 0.117580764 0.0497147748 -2.829132e-02 -0.012828459 -0.154043043
## [14,] -0.070243575 -0.0796280368 -2.864770e-02 0.002421167 -0.049072590
## [15,] -0.009336747 -0.0074529031 -2.073828e-02 -0.051178993 0.170510697
## [16,] -0.060067667 0.0002631512 3.262493e-02 -0.016559794 -0.138780628
## [17,] -0.001581433 -0.0280843947 -4.949775e-03 0.032237273 -0.006483009
## [18,] 0.006487336 0.0411940446 3.213751e-02 0.043537886 0.092169601
## [19,] 0.015551107 0.0030274690 -1.932224e-03 0.009374523 -0.002988417
## [,11] [,12] [,13] [,14] [,15]
## [1,] 0.024932852 -0.292605987 0.312709018 -0.26964964 -0.3186603741
## [2,] -0.539919446 0.017186401 0.076205000 0.27338277 0.0055412322
## [3,] -0.262981455 0.008213101 -0.024859170 0.10658055 -0.0274058354
## [4,] -0.001954934 -0.045375952 0.009305952 -0.01937315 -0.0908654949
## [5,] -0.008897735 -0.045785030 -0.037881766 0.01671546 -0.0155629661
## [6,] -0.100377130 -0.097604922 -0.097826145 0.12876333 -0.0193877082
## [7,] -0.017950729 -0.011551806 -0.062346207 -0.04812401 0.0002916825
## [8,] 0.010150022 0.092705473 -0.034210721 0.09096144 0.1012905982
## [9,] 0.063104640 -0.216681286 -0.086215007 -0.03390108 0.0046681487
## [10,] -0.357344245 0.403372987 0.043647260 0.14554354 -0.0775913615
## [11,] 0.560746419 0.381176775 -0.086975512 -0.09773309 -0.0607385653
## [12,] -0.123679016 0.720859602 0.115572202 -0.25760286 -0.0406999395
## [13,] 0.360751613 0.075125985 0.516997516 0.50084920 0.1996663422
## [14,] 0.169437372 0.046142002 -0.513370774 0.51674758 -0.3352709457
## [15,] -0.008020949 0.001984753 -0.109735915 0.15477548 0.7089616098
## [16,] -0.005103206 0.055889563 -0.060487120 0.14176831 -0.3828537634
## [17,] -0.006112682 0.005716975 -0.478435196 -0.35361554 0.2344767945
## [18,] -0.080340170 -0.077549701 0.147055837 -0.09092231 0.0758503405
## [19,] 0.035764536 -0.008193072 0.228728655 -0.14100072 -0.0702989971
## [,16] [,17] [,18] [,19]
## [1,] -0.317787087 0.3036290750 -0.321195688 -0.3263649183
## [2,] -0.043275808 0.1931477177 -0.024621393 -0.0592889579
## [3,] -0.088359244 0.0561818277 -0.002873030 -0.0025356810
## [4,] 0.059013163 -0.0009717231 -0.007422041 -0.0297115833
## [5,] -0.003378493 0.0314934515 -0.009362872 -0.0047427634
## [6,] 0.053471808 0.0768420413 0.055700343 0.0104193683
## [7,] -0.017472978 -0.0271257532 -0.045562365 0.0216827690
## [8,] 0.055229510 -0.0467419837 0.052205382 0.0561780924
## [9,] -0.021685556 -0.0436404770 -0.081664608 0.0126585823
## [10,] 0.067995931 0.0557843061 0.023786489 -0.0003176912
## [11,] 0.048645989 -0.0621211885 0.026631209 0.0131554086
## [12,] -0.246762131 0.0475860798 -0.115191245 -0.1049781997
## [13,] 0.006109412 0.3924016623 0.142603532 -0.2480200900
## [14,] -0.496053674 0.1322404878 -0.012961234 0.0973935063
## [15,] -0.004805014 0.0379015427 -0.326649647 0.1024801611
## [16,] 0.697408673 0.1893614834 0.075007150 -0.0145714433
## [17,] 0.063477585 0.6675786465 0.231575851 -0.2326512180
## [18,] -0.267689999 -0.1135970443 0.824148844 0.0428272847
## [19,] -0.055451432 0.4231578208 -0.035785300 0.8580768091
library(AMR)
library(ggplot2)
library(ggfortify)
(PCA$sdev ^ 2)
## [1] 9.272818630 2.545610115 1.342747565 1.058640141 0.959082927 0.894031302
## [7] 0.753322740 0.615448006 0.374369029 0.294989892 0.277013221 0.237166015
## [13] 0.128439698 0.103116158 0.073124222 0.054927000 0.050991028 0.045106892
## [19] 0.009327566
evplot <- function(ev)
{
# Broken stick model (MacArthur 1957)
n <- length(ev)
bsm <- data.frame(j=seq(1:n), p=0)
bsm$p[1] <- 1/n
for (i in 2:n) bsm$p[i] <- bsm$p[i-1] + (1/(n + 1 - i))
bsm$p <- 100*bsm$p/n
# Plot eigenvalues and % of variation for each axis
op <- par(mfrow=c(2,1))
barplot(ev, main="Eigenvalues", col="bisque", las=2)
abline(h=mean(ev), col="red")
legend("topright", "Average eigenvalue", lwd=1, col=2, bty="n")
barplot(t(cbind(100*ev/sum(ev), bsm$p[n:1])), beside=TRUE,
main="% variation", col=c("bisque",2), las=2)
legend("topright", c("% eigenvalue", "Broken stick model"),
pch=15, col=c("bisque",2), bty="n")
par(op)
}
ev <- PCA$sdev^2
evplot(ev) #according to Kaiser-Guttman criteron, we can use the first 4 PCs, even though the broken stick model shows only the first above the red bar plot... not 100% confident I know what this means, but pretty sure PC1 is body size
plotF1<- autoplot(PCA, data = rawFa, colour='SPP', loadings=FALSE, loadings.label=FALSE, frame=TRUE, frame.type='norm')+ ggtitle("PCA Plot of Morphology traits") + theme_minimal()
plotF1
plotF2<- autoplot(PCA, data = rawFa, colour='QUARTILE', shape="SPP", frame=TRUE, frame.type='norm')+ ggtitle("PCA Plot of Morphology traits") + theme_minimal()
plotF2
plotF3<- autoplot(PCA, data = rawFa, colour='BASIN', shape="SPP", frame=TRUE, frame.type='norm')+ ggtitle("PCA Plot of Morphology traits") + theme_minimal()
plotF3
plotF4<- autoplot(PCA, data = rawFa, colour='WATERSHED', shape="SPP", frame=TRUE, frame.type='norm')+ ggtitle("PCA Plot of Morphology traits") + theme_minimal()
plotF4
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
plotF5<- autoplot(PCA, x=2, y=3, data = rawFa, colour='SPP', loadings=FALSE, loadings.label=FALSE, frame=TRUE, frame.type='norm')+ ggtitle("PCA Plot of Morphology traits") + theme_minimal()
plotF5
plotF6<- autoplot(PCA, x=2, y=3, data = rawFa, colour='QUARTILE', shape="SPP", frame=TRUE, frame.type='norm')+ ggtitle("PCA Plot of Morphology traits") + theme_minimal()
plotF6
plotF7<- autoplot(PCA, x=2, y=3, data = rawFa, colour='BASIN', shape="SPP", frame=TRUE, frame.type='norm')+ ggtitle("PCA Plot of Morphology traits") + theme_minimal()
plotF7
plotF8<- autoplot(PCA, x=2, y=3, data = rawFa, colour='WATERSHED', shape="SPP", frame=TRUE, frame.type='norm')+ ggtitle("PCA Plot of Morphology traits") + theme_minimal()
plotF8
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
I will try to compare the area of the density clusters for the PCA. I will attempt to do this use EMD (earth mover’s distance). From what I understand, this will basically tell me how much “work” is needed to transform one distribution into another, thus providing a metric of the overall difference in shape between two distributions.
To do this, I would need to be able to separate the loadings based on species, but I have no idea how to do that without running two separate PCAs, one for each group (which sorta defeats the point I think).
library(emdist) OR library(energy)